Kokkos Core Kernels Package  Version of the Day
Kokkos_ScatterView.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
50 
51 #ifndef KOKKOS_SCATTER_VIEW_HPP
52 #define KOKKOS_SCATTER_VIEW_HPP
53 
54 #include <Kokkos_Core.hpp>
55 #include <utility>
56 
57 namespace Kokkos {
58 namespace Experimental {
59 
60 /*
61  * Reduction Type list
62  * - These corresponds to subset of the reducers in parallel_reduce
63  * - See Implementations of ScatterValue for details.
64  */
65 struct ScatterSum {};
66 struct ScatterProd {};
67 struct ScatterMax {};
68 struct ScatterMin {};
69 
70 struct ScatterNonDuplicated {};
71 struct ScatterDuplicated {};
72 
73 struct ScatterNonAtomic {};
74 struct ScatterAtomic {};
75 
76 } // namespace Experimental
77 } // namespace Kokkos
78 
79 namespace Kokkos {
80 namespace Impl {
81 namespace Experimental {
82 
83 template <typename ExecSpace>
84 struct DefaultDuplication;
85 
86 template <typename ExecSpace, typename Duplication>
87 struct DefaultContribution;
88 
89 #ifdef KOKKOS_ENABLE_SERIAL
90 template <>
91 struct DefaultDuplication<Kokkos::Serial> {
92  using type = Kokkos::Experimental::ScatterNonDuplicated;
93 };
94 
95 template <>
96 struct DefaultContribution<Kokkos::Serial,
97  Kokkos::Experimental::ScatterNonDuplicated> {
98  using type = Kokkos::Experimental::ScatterNonAtomic;
99 };
100 template <>
101 struct DefaultContribution<Kokkos::Serial,
102  Kokkos::Experimental::ScatterDuplicated> {
103  using type = Kokkos::Experimental::ScatterNonAtomic;
104 };
105 #endif
106 
107 #ifdef KOKKOS_ENABLE_OPENMP
108 template <>
109 struct DefaultDuplication<Kokkos::OpenMP> {
110  using type = Kokkos::Experimental::ScatterDuplicated;
111 };
112 template <>
113 struct DefaultContribution<Kokkos::OpenMP,
114  Kokkos::Experimental::ScatterNonDuplicated> {
115  using type = Kokkos::Experimental::ScatterAtomic;
116 };
117 template <>
118 struct DefaultContribution<Kokkos::OpenMP,
119  Kokkos::Experimental::ScatterDuplicated> {
120  using type = Kokkos::Experimental::ScatterNonAtomic;
121 };
122 #endif
123 
124 #ifdef KOKKOS_ENABLE_OPENMPTARGET
125 template <>
126 struct DefaultDuplication<Kokkos::Experimental::OpenMPTarget> {
127  using type = Kokkos::Experimental::ScatterNonDuplicated;
128 };
129 template <>
130 struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
131  Kokkos::Experimental::ScatterNonDuplicated> {
132  using type = Kokkos::Experimental::ScatterAtomic;
133 };
134 template <>
135 struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
136  Kokkos::Experimental::ScatterDuplicated> {
137  using type = Kokkos::Experimental::ScatterNonAtomic;
138 };
139 #endif
140 
141 #ifdef KOKKOS_ENABLE_HPX
142 template <>
143 struct DefaultDuplication<Kokkos::Experimental::HPX> {
144  using type = Kokkos::Experimental::ScatterDuplicated;
145 };
146 template <>
147 struct DefaultContribution<Kokkos::Experimental::HPX,
148  Kokkos::Experimental::ScatterNonDuplicated> {
149  using type = Kokkos::Experimental::ScatterAtomic;
150 };
151 template <>
152 struct DefaultContribution<Kokkos::Experimental::HPX,
153  Kokkos::Experimental::ScatterDuplicated> {
154  using type = Kokkos::Experimental::ScatterNonAtomic;
155 };
156 #endif
157 
158 #ifdef KOKKOS_ENABLE_THREADS
159 template <>
160 struct DefaultDuplication<Kokkos::Threads> {
161  using type = Kokkos::Experimental::ScatterDuplicated;
162 };
163 template <>
164 struct DefaultContribution<Kokkos::Threads,
165  Kokkos::Experimental::ScatterNonDuplicated> {
166  using type = Kokkos::Experimental::ScatterAtomic;
167 };
168 template <>
169 struct DefaultContribution<Kokkos::Threads,
170  Kokkos::Experimental::ScatterDuplicated> {
171  using type = Kokkos::Experimental::ScatterNonAtomic;
172 };
173 #endif
174 
175 #ifdef KOKKOS_ENABLE_CUDA
176 template <>
177 struct DefaultDuplication<Kokkos::Cuda> {
178  using type = Kokkos::Experimental::ScatterNonDuplicated;
179 };
180 template <>
181 struct DefaultContribution<Kokkos::Cuda,
182  Kokkos::Experimental::ScatterNonDuplicated> {
183  using type = Kokkos::Experimental::ScatterAtomic;
184 };
185 template <>
186 struct DefaultContribution<Kokkos::Cuda,
187  Kokkos::Experimental::ScatterDuplicated> {
188  using type = Kokkos::Experimental::ScatterAtomic;
189 };
190 #endif
191 
192 #ifdef KOKKOS_ENABLE_HIP
193 template <>
194 struct DefaultDuplication<Kokkos::Experimental::HIP> {
195  using type = Kokkos::Experimental::ScatterNonDuplicated;
196 };
197 template <>
198 struct DefaultContribution<Kokkos::Experimental::HIP,
199  Kokkos::Experimental::ScatterNonDuplicated> {
200  using type = Kokkos::Experimental::ScatterAtomic;
201 };
202 template <>
203 struct DefaultContribution<Kokkos::Experimental::HIP,
204  Kokkos::Experimental::ScatterDuplicated> {
205  using type = Kokkos::Experimental::ScatterAtomic;
206 };
207 #endif
208 
209 #ifdef KOKKOS_ENABLE_SYCL
210 template <>
211 struct DefaultDuplication<Kokkos::Experimental::SYCL> {
212  using type = Kokkos::Experimental::ScatterNonDuplicated;
213 };
214 template <>
215 struct DefaultContribution<Kokkos::Experimental::SYCL,
216  Kokkos::Experimental::ScatterNonDuplicated> {
217  using type = Kokkos::Experimental::ScatterAtomic;
218 };
219 template <>
220 struct DefaultContribution<Kokkos::Experimental::SYCL,
221  Kokkos::Experimental::ScatterDuplicated> {
222  using type = Kokkos::Experimental::ScatterAtomic;
223 };
224 #endif
225 
226 // FIXME All these scatter values need overhaul:
227 // - like should they be copyable at all?
228 // - what is the internal handle type
229 // - remove join
230 // - consistently use the update function in operators
231 template <typename ValueType, typename Op, typename DeviceType,
232  typename Contribution>
233 struct ScatterValue;
234 
235 /* ScatterValue <Op=ScatterSum, Contribution=ScatterNonAtomic> is
236  the object returned by the access operator() of ScatterAccess. This class
237  inherits from the Sum<> reducer and it wraps join(dest, src) with convenient
238  operator+=, etc. Note the addition of update(ValueType const& rhs) and
239  reset() so that all reducers can have common functions See ReduceDuplicates
240  and ResetDuplicates ) */
241 template <typename ValueType, typename DeviceType>
242 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
243  Kokkos::Experimental::ScatterNonAtomic> {
244  ValueType& value;
245 
246  public:
247  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
248  : value(value_in) {}
249  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
250  : value(other.value) {}
251  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
252  update(rhs);
253  }
254  KOKKOS_FORCEINLINE_FUNCTION void operator++() { update(1); }
255  KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { update(1); }
256  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
257  update(ValueType(-rhs));
258  }
259  KOKKOS_FORCEINLINE_FUNCTION void operator--() { update(ValueType(-1)); }
260  KOKKOS_FORCEINLINE_FUNCTION void operator--(int) { update(ValueType(-1)); }
261  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
262  value += rhs;
263  }
264  KOKKOS_FORCEINLINE_FUNCTION void reset() {
265  value = reduction_identity<ValueType>::sum();
266  }
267 };
268 
269 /* ScatterValue <Op=ScatterSum, Contribution=ScatterAtomic> is the
270  object returned by the access operator() of ScatterAccess. This class inherits
271  from the Sum<> reducer, and similar to that returned by an Atomic View, it
272  wraps Kokkos::atomic_add with convenient operator+=, etc. This version also has
273  the update(rhs) and reset() functions. */
274 template <typename ValueType, typename DeviceType>
275 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
276  Kokkos::Experimental::ScatterAtomic> {
277  ValueType& value;
278 
279  public:
280  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
281  : value(value_in) {}
282 
283  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
284  this->join(value, rhs);
285  }
286  KOKKOS_FORCEINLINE_FUNCTION void operator++() { this->join(value, 1); }
287  KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { this->join(value, 1); }
288  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
289  this->join(value, ValueType(-rhs));
290  }
291  KOKKOS_FORCEINLINE_FUNCTION void operator--() {
292  this->join(value, ValueType(-1));
293  }
294  KOKKOS_FORCEINLINE_FUNCTION void operator--(int) {
295  this->join(value, ValueType(-1));
296  }
297 
298  KOKKOS_INLINE_FUNCTION
299  void join(ValueType& dest, const ValueType& src) const {
300  Kokkos::atomic_add(&dest, src);
301  }
302 
303  KOKKOS_INLINE_FUNCTION
304  void join(volatile ValueType& dest, const volatile ValueType& src) const {
305  Kokkos::atomic_add(&dest, src);
306  }
307 
308  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
309  this->join(value, rhs);
310  }
311 
312  KOKKOS_FORCEINLINE_FUNCTION void reset() {
313  value = reduction_identity<ValueType>::sum();
314  }
315 };
316 
317 /* ScatterValue <Op=ScatterProd, Contribution=ScatterNonAtomic> is
318  the object returned by the access operator() of ScatterAccess. This class
319  inherits from the Prod<> reducer, and it wraps join(dest, src) with
320  convenient operator*=, etc. Note the addition of update(ValueType const& rhs)
321  and reset() so that all reducers can have common functions See
322  ReduceDuplicates and ResetDuplicates ) */
323 template <typename ValueType, typename DeviceType>
324 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
325  Kokkos::Experimental::ScatterNonAtomic> {
326  ValueType& value;
327 
328  public:
329  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
330  : value(value_in) {}
331  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
332  : value(other.value) {}
333  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
334  value *= rhs;
335  }
336  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
337  value /= rhs;
338  }
339 
340  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
341  value *= rhs;
342  }
343  KOKKOS_FORCEINLINE_FUNCTION void reset() {
344  value = reduction_identity<ValueType>::prod();
345  }
346 };
347 
348 /* ScatterValue <Op=ScatterProd, Contribution=ScatterAtomic> is the
349  object returned by the access operator() of ScatterAccess. This class
350  inherits from the Prod<> reducer, and similar to that returned by an Atomic
351  View, it wraps and atomic_prod with convenient operator*=, etc. atomic_prod
352  uses the atomic_compare_exchange. This version also has the update(rhs)
353  and reset() functions. */
354 template <typename ValueType, typename DeviceType>
355 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
356  Kokkos::Experimental::ScatterAtomic> {
357  ValueType& value;
358 
359  public:
360  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
361  : value(value_in) {}
362  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
363  : value(other.value) {}
364 
365  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
366  Kokkos::atomic_mul(&value, rhs);
367  }
368  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
369  Kokkos::atomic_div(&value, rhs);
370  }
371 
372  KOKKOS_FORCEINLINE_FUNCTION
373  void atomic_prod(ValueType& dest, const ValueType& src) const {
374  bool success = false;
375  while (!success) {
376  ValueType dest_old = dest;
377  ValueType dest_new = dest_old * src;
378  dest_new =
379  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
380  success = ((dest_new - dest_old) / dest_old <= 1e-15);
381  }
382  }
383 
384  KOKKOS_INLINE_FUNCTION
385  void join(ValueType& dest, const ValueType& src) const {
386  atomic_prod(&dest, src);
387  }
388 
389  KOKKOS_INLINE_FUNCTION
390  void join(volatile ValueType& dest, const volatile ValueType& src) const {
391  atomic_prod(&dest, src);
392  }
393 
394  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
395  atomic_prod(&value, rhs);
396  }
397  KOKKOS_FORCEINLINE_FUNCTION void reset() {
398  value = reduction_identity<ValueType>::prod();
399  }
400 };
401 
402 /* ScatterValue <Op=ScatterMin, Contribution=ScatterNonAtomic> is
403  the object returned by the access operator() of ScatterAccess. This class
404  inherits from the Min<> reducer and it wraps join(dest, src) with convenient
405  update(rhs). Note the addition of update(ValueType const& rhs) and reset()
406  are so that all reducers can have a common update function See
407  ReduceDuplicates and ResetDuplicates ) */
408 template <typename ValueType, typename DeviceType>
409 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
410  Kokkos::Experimental::ScatterNonAtomic> {
411  ValueType& value;
412  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
413  : value(value_in) {}
414  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
415  : value(other.value) {}
416 
417  public:
418  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
419  value = rhs < value ? rhs : value;
420  }
421  KOKKOS_FORCEINLINE_FUNCTION void reset() {
422  value = reduction_identity<ValueType>::min();
423  }
424 };
425 
426 /* ScatterValue <Op=ScatterMin, Contribution=ScatterAtomic> is the
427  object returned by the access operator() of ScatterAccess. This class
428  inherits from the Min<> reducer, and similar to that returned by an Atomic
429  View, it wraps atomic_min with join(), etc. atomic_min uses the
430  atomic_compare_exchange. This version also has the update(rhs) and reset()
431  functions. */
432 template <typename ValueType, typename DeviceType>
433 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
434  Kokkos::Experimental::ScatterAtomic> {
435  ValueType& value;
436 
437  public:
438  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
439  : value(value_in) {}
440  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
441  : value(other.value) {}
442 
443  KOKKOS_FORCEINLINE_FUNCTION
444  void atomic_min(ValueType& dest, const ValueType& src) const {
445  bool success = false;
446  while (!success) {
447  ValueType dest_old = dest;
448  ValueType dest_new = (dest_old > src) ? src : dest_old;
449  dest_new =
450  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
451  success = ((dest_new - dest_old) / dest_old <= 1e-15);
452  }
453  }
454 
455  KOKKOS_INLINE_FUNCTION
456  void join(ValueType& dest, const ValueType& src) const {
457  atomic_min(dest, src);
458  }
459 
460  KOKKOS_INLINE_FUNCTION
461  void join(volatile ValueType& dest, const volatile ValueType& src) const {
462  atomic_min(dest, src);
463  }
464 
465  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
466  this->join(value, rhs);
467  }
468  KOKKOS_FORCEINLINE_FUNCTION void reset() {
469  value = reduction_identity<ValueType>::min();
470  }
471 };
472 
473 /* ScatterValue <Op=ScatterMax, Contribution=ScatterNonAtomic> is
474  the object returned by the access operator() of ScatterAccess. This class
475  inherits from the Max<> reducer and it wraps join(dest, src) with convenient
476  update(rhs). Note the addition of update(ValueType const& rhs) and reset()
477  are so that all reducers can have a common update function See
478  ReduceDuplicates and ResetDuplicates ) */
479 template <typename ValueType, typename DeviceType>
480 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
481  Kokkos::Experimental::ScatterNonAtomic> {
482  ValueType& value;
483 
484  public:
485  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
486  : value(value_in) {}
487  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
488  : value(other.value) {}
489  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
490  value = rhs > value ? rhs : value;
491  }
492  KOKKOS_FORCEINLINE_FUNCTION void reset() {
493  value = reduction_identity<ValueType>::max();
494  }
495 };
496 
497 /* ScatterValue <Op=ScatterMax, Contribution=ScatterAtomic> is the
498  object returned by the access operator() of ScatterAccess. This class
499  inherits from the Max<> reducer, and similar to that returned by an Atomic
500  View, it wraps atomic_max with join(), etc. atomic_max uses the
501  atomic_compare_exchange. This version also has the update(rhs) and reset()
502  functions. */
503 template <typename ValueType, typename DeviceType>
504 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
505  Kokkos::Experimental::ScatterAtomic> {
506  ValueType& value;
507 
508  public:
509  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
510  : value(value_in) {}
511  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
512  : value(other.value) {}
513 
514  KOKKOS_FORCEINLINE_FUNCTION
515  void atomic_max(ValueType& dest, const ValueType& src) const {
516  bool success = false;
517  while (!success) {
518  ValueType dest_old = dest;
519  ValueType dest_new = (dest_old < src) ? src : dest_old;
520  dest_new =
521  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
522  success = ((dest_new - dest_old) / dest_old <= 1e-15);
523  }
524  }
525 
526  KOKKOS_INLINE_FUNCTION
527  void join(ValueType& dest, const ValueType& src) const {
528  atomic_max(dest, src);
529  }
530 
531  KOKKOS_INLINE_FUNCTION
532  void join(volatile ValueType& dest, const volatile ValueType& src) const {
533  atomic_max(dest, src);
534  }
535 
536  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
537  this->join(value, rhs);
538  }
539  KOKKOS_FORCEINLINE_FUNCTION void reset() {
540  value = reduction_identity<ValueType>::max();
541  }
542 };
543 
544 /* DuplicatedDataType, given a View DataType, will create a new DataType
545  that has a new runtime dimension which becomes the largest-stride dimension.
546  In the case of LayoutLeft, due to the limitation induced by the design of
547  DataType itself, it must convert any existing compile-time dimensions into
548  runtime dimensions. */
549 template <typename T, typename Layout>
550 struct DuplicatedDataType;
551 
552 template <typename T>
553 struct DuplicatedDataType<T, Kokkos::LayoutRight> {
554  using value_type = T*; // For LayoutRight, add a star all the way on the left
555 };
556 
557 template <typename T, size_t N>
558 struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
559  using value_type =
560  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[N];
561 };
562 
563 template <typename T>
564 struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
565  using value_type =
566  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[];
567 };
568 
569 template <typename T>
570 struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
571  using value_type =
572  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type*;
573 };
574 
575 template <typename T>
576 struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
577  using value_type = T*;
578 };
579 
580 template <typename T, size_t N>
581 struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
582  using value_type =
583  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
584 };
585 
586 template <typename T>
587 struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
588  using value_type =
589  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
590 };
591 
592 template <typename T>
593 struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
594  using value_type =
595  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
596 };
597 
598 /* Insert integer argument pack into array */
599 
600 template <class T>
601 void args_to_array(size_t* array, int pos, T dim0) {
602  array[pos] = dim0;
603 }
604 template <class T, class... Dims>
605 void args_to_array(size_t* array, int pos, T dim0, Dims... dims) {
606  array[pos] = dim0;
607  args_to_array(array, pos + 1, dims...);
608 }
609 
610 /* Slice is just responsible for stuffing the correct number of Kokkos::ALL
611  arguments on the correct side of the index in a call to subview() to get a
612  subview where the index specified is the largest-stride one. */
613 template <typename Layout, int rank, typename V, typename... Args>
614 struct Slice {
615  using next = Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...>;
616  using value_type = typename next::value_type;
617 
618  static value_type get(V const& src, const size_t i, Args... args) {
619  return next::get(src, i, Kokkos::ALL, args...);
620  }
621 };
622 
623 template <typename V, typename... Args>
624 struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
625  using value_type =
626  typename Kokkos::Impl::ViewMapping<void, V, const size_t, Args...>::type;
627  static value_type get(V const& src, const size_t i, Args... args) {
628  return Kokkos::subview(src, i, args...);
629  }
630 };
631 
632 template <typename V, typename... Args>
633 struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
634  using value_type =
635  typename Kokkos::Impl::ViewMapping<void, V, Args..., const size_t>::type;
636  static value_type get(V const& src, const size_t i, Args... args) {
637  return Kokkos::subview(src, args..., i);
638  }
639 };
640 
641 template <typename ExecSpace, typename ValueType, typename Op>
642 struct ReduceDuplicates;
643 
644 template <typename ExecSpace, typename ValueType, typename Op>
645 struct ReduceDuplicatesBase {
646  using Derived = ReduceDuplicates<ExecSpace, ValueType, Op>;
647  ValueType const* src;
648  ValueType* dst;
649  size_t stride;
650  size_t start;
651  size_t n;
652  ReduceDuplicatesBase(ExecSpace const& exec_space, ValueType const* src_in,
653  ValueType* dest_in, size_t stride_in, size_t start_in,
654  size_t n_in, std::string const& name)
655  : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) {
656  parallel_for(
657  std::string("Kokkos::ScatterView::ReduceDuplicates [") + name + "]",
658  RangePolicy<ExecSpace, size_t>(exec_space, 0, stride),
659  static_cast<Derived const&>(*this));
660  }
661 };
662 
663 /* ReduceDuplicates -- Perform reduction on destination array using strided
664  * source Use ScatterValue<> specific to operation to wrap destination array so
665  * that the reduction operation can be accessed via the update(rhs) function */
666 template <typename ExecSpace, typename ValueType, typename Op>
667 struct ReduceDuplicates
668  : public ReduceDuplicatesBase<ExecSpace, ValueType, Op> {
669  using Base = ReduceDuplicatesBase<ExecSpace, ValueType, Op>;
670  ReduceDuplicates(ExecSpace const& exec_space, ValueType const* src_in,
671  ValueType* dst_in, size_t stride_in, size_t start_in,
672  size_t n_in, std::string const& name)
673  : Base(exec_space, src_in, dst_in, stride_in, start_in, n_in, name) {}
674  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
675  for (size_t j = Base::start; j < Base::n; ++j) {
676  ScatterValue<ValueType, Op, ExecSpace,
677  Kokkos::Experimental::ScatterNonAtomic>
678  sv(Base::dst[i]);
679  sv.update(Base::src[i + Base::stride * j]);
680  }
681  }
682 };
683 
684 template <typename ExecSpace, typename ValueType, typename Op>
685 struct ResetDuplicates;
686 
687 template <typename ExecSpace, typename ValueType, typename Op>
688 struct ResetDuplicatesBase {
689  using Derived = ResetDuplicates<ExecSpace, ValueType, Op>;
690  ValueType* data;
691  ResetDuplicatesBase(ExecSpace const& exec_space, ValueType* data_in,
692  size_t size_in, std::string const& name)
693  : data(data_in) {
694  parallel_for(
695  std::string("Kokkos::ScatterView::ResetDuplicates [") + name + "]",
696  RangePolicy<ExecSpace, size_t>(exec_space, 0, size_in),
697  static_cast<Derived const&>(*this));
698  }
699 };
700 
701 /* ResetDuplicates -- Perform reset on destination array
702  * Use ScatterValue<> specific to operation to wrap destination array so that
703  * the reset operation can be accessed via the reset() function */
704 template <typename ExecSpace, typename ValueType, typename Op>
705 struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
706  using Base = ResetDuplicatesBase<ExecSpace, ValueType, Op>;
707  ResetDuplicates(ExecSpace const& exec_space, ValueType* data_in,
708  size_t size_in, std::string const& name)
709  : Base(exec_space, data_in, size_in, name) {}
710  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
711  ScatterValue<ValueType, Op, ExecSpace,
712  Kokkos::Experimental::ScatterNonAtomic>
713  sv(Base::data[i]);
714  sv.reset();
715  }
716 };
717 
718 template <typename... P>
719 void check_scatter_view_allocation_properties_argument(
720  ViewCtorProp<P...> const&) {
721  static_assert(ViewCtorProp<P...>::has_execution_space &&
722  ViewCtorProp<P...>::has_label &&
723  ViewCtorProp<P...>::initialize,
724  "Allocation property must have an execution name as well as a "
725  "label, and must perform the view initialization");
726 }
727 
728 } // namespace Experimental
729 } // namespace Impl
730 } // namespace Kokkos
731 
732 namespace Kokkos {
733 namespace Experimental {
734 
735 template <typename DataType,
736  typename Layout = Kokkos::DefaultExecutionSpace::array_layout,
737  typename DeviceType = Kokkos::DefaultExecutionSpace,
738  typename Op = Kokkos::Experimental::ScatterSum,
739  typename Duplication = typename Kokkos::Impl::Experimental::
740  DefaultDuplication<typename DeviceType::execution_space>::type,
741  typename Contribution =
742  typename Kokkos::Impl::Experimental::DefaultContribution<
743  typename DeviceType::execution_space, Duplication>::type>
744 class ScatterView;
745 
746 template <typename DataType, typename Op, typename DeviceType, typename Layout,
747  typename Duplication, typename Contribution,
748  typename OverrideContribution>
749 class ScatterAccess;
750 
751 // non-duplicated implementation
752 template <typename DataType, typename Op, typename DeviceType, typename Layout,
753  typename Contribution>
754 class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
755  Contribution> {
756  public:
757  using execution_space = typename DeviceType::execution_space;
758  using memory_space = typename DeviceType::memory_space;
759  using device_type = Kokkos::Device<execution_space, memory_space>;
760  using original_view_type = Kokkos::View<DataType, Layout, device_type>;
761  using original_value_type = typename original_view_type::value_type;
762  using original_reference_type = typename original_view_type::reference_type;
763  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
764  ScatterNonDuplicated, Contribution,
765  ScatterNonAtomic>;
766  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
767  ScatterNonDuplicated, Contribution, ScatterAtomic>;
768  template <class, class, class, class, class, class>
769  friend class ScatterView;
770 
771  ScatterView() = default;
772 
773  template <typename RT, typename... RP>
774  ScatterView(View<RT, RP...> const& original_view)
775  : internal_view(original_view) {}
776 
777  template <typename RT, typename... P, typename... RP>
778  ScatterView(execution_space const& /* exec_space */,
779  View<RT, RP...> const& original_view)
780  : internal_view(original_view) {}
781 
782  template <typename... Dims>
783  ScatterView(std::string const& name, Dims... dims)
784  : internal_view(name, dims...) {}
785 
786  // This overload allows specifying an execution space instance to be
787  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
788  // first argument.
789  template <typename... P, typename... Dims>
790  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
791  : internal_view(arg_prop, dims...) {
792  using ::Kokkos::Impl::Experimental::
793  check_scatter_view_allocation_properties_argument;
794  check_scatter_view_allocation_properties_argument(arg_prop);
795  }
796 
797  template <typename OtherDataType, typename OtherDeviceType>
798  KOKKOS_FUNCTION ScatterView(
799  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
800  ScatterNonDuplicated, Contribution>& other_view)
801  : internal_view(other_view.internal_view) {}
802 
803  template <typename OtherDataType, typename OtherDeviceType>
804  KOKKOS_FUNCTION void operator=(
805  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
806  ScatterNonDuplicated, Contribution>& other_view) {
807  internal_view = other_view.internal_view;
808  }
809 
810  template <typename OverrideContribution = Contribution>
811  KOKKOS_FORCEINLINE_FUNCTION
812  ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
813  Contribution, OverrideContribution>
814  access() const {
815  return ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
816  Contribution, OverrideContribution>(*this);
817  }
818 
819  original_view_type subview() const { return internal_view; }
820 
821  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
822  return internal_view.is_allocated();
823  }
824 
825  template <typename DT, typename... RP>
826  void contribute_into(View<DT, RP...> const& dest) const {
827  contribute_into(execution_space(), dest);
828  }
829 
830  template <typename DT, typename... RP>
831  void contribute_into(execution_space const& exec_space,
832  View<DT, RP...> const& dest) const {
833  using dest_type = View<DT, RP...>;
834  static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
835  "ScatterView contribute destination has different layout");
836  static_assert(
838  execution_space, typename dest_type::memory_space>::accessible,
839  "ScatterView contribute destination memory space not accessible");
840  if (dest.data() == internal_view.data()) return;
841  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
842  original_value_type, Op>(
843  exec_space, internal_view.data(), dest.data(), 0, 0, 1,
844  internal_view.label());
845  }
846 
847  void reset(execution_space const& exec_space = execution_space()) {
848  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
849  original_value_type, Op>(
850  exec_space, internal_view.data(), internal_view.size(),
851  internal_view.label());
852  }
853  template <typename DT, typename... RP>
854  void reset_except(View<DT, RP...> const& view) {
855  reset_except(execution_space(), view);
856  }
857 
858  template <typename DT, typename... RP>
859  void reset_except(const execution_space& exec_space,
860  View<DT, RP...> const& view) {
861  if (view.data() != internal_view.data()) reset(exec_space);
862  }
863 
864  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
865  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
866  const size_t n6 = 0, const size_t n7 = 0) {
867  ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
868  }
869 
870  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
871  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
872  const size_t n6 = 0, const size_t n7 = 0) {
873  ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
874  }
875 
876  protected:
877  template <typename... Args>
878  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args) const {
879  return internal_view(args...);
880  }
881 
882  private:
883  using internal_view_type = original_view_type;
884  internal_view_type internal_view;
885 };
886 
887 template <typename DataType, typename Op, typename DeviceType, typename Layout,
888  typename Contribution, typename OverrideContribution>
889 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
890  Contribution, OverrideContribution> {
891  public:
892  using view_type = ScatterView<DataType, Layout, DeviceType, Op,
893  ScatterNonDuplicated, Contribution>;
894  using original_value_type = typename view_type::original_value_type;
895  using value_type = Kokkos::Impl::Experimental::ScatterValue<
896  original_value_type, Op, DeviceType, OverrideContribution>;
897 
898  KOKKOS_INLINE_FUNCTION
899  ScatterAccess() : view(view_type()) {}
900 
901  KOKKOS_INLINE_FUNCTION
902  ScatterAccess(view_type const& view_in) : view(view_in) {}
903  KOKKOS_DEFAULTED_FUNCTION
904  ~ScatterAccess() = default;
905 
906  template <typename... Args>
907  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
908  return view.at(args...);
909  }
910 
911  template <typename Arg>
912  KOKKOS_FORCEINLINE_FUNCTION
913  typename std::enable_if<view_type::original_view_type::rank == 1 &&
914  std::is_integral<Arg>::value,
915  value_type>::type
916  operator[](Arg arg) const {
917  return view.at(arg);
918  }
919 
920  private:
921  view_type const& view;
922 };
923 
924 // duplicated implementation
925 // LayoutLeft and LayoutRight are different enough that we'll just specialize
926 // each
927 
928 template <typename DataType, typename Op, typename DeviceType,
929  typename Contribution>
930 class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
931  ScatterDuplicated, Contribution> {
932  public:
933  using execution_space = typename DeviceType::execution_space;
934  using memory_space = typename DeviceType::memory_space;
935  using device_type = Kokkos::Device<execution_space, memory_space>;
936  using original_view_type =
938  using original_value_type = typename original_view_type::value_type;
939  using original_reference_type = typename original_view_type::reference_type;
940  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
941  ScatterDuplicated, Contribution, ScatterNonAtomic>;
942  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
943  ScatterDuplicated, Contribution, ScatterAtomic>;
944  template <class, class, class, class, class, class>
945  friend class ScatterView;
946 
947  using data_type_info =
948  typename Kokkos::Impl::Experimental::DuplicatedDataType<
949  DataType, Kokkos::LayoutRight>;
950  using internal_data_type = typename data_type_info::value_type;
951  using internal_view_type =
952  Kokkos::View<internal_data_type, Kokkos::LayoutRight, device_type>;
953 
954  ScatterView() = default;
955 
956  template <typename OtherDataType, typename OtherDeviceType>
957  KOKKOS_FUNCTION ScatterView(
958  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
959  ScatterDuplicated, Contribution>& other_view)
960  : unique_token(other_view.unique_token),
961  internal_view(other_view.internal_view) {}
962 
963  template <typename OtherDataType, typename OtherDeviceType>
964  KOKKOS_FUNCTION void operator=(
965  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
966  ScatterDuplicated, Contribution>& other_view) {
967  unique_token = other_view.unique_token;
968  internal_view = other_view.internal_view;
969  }
970 
971  template <typename RT, typename... RP>
972  ScatterView(View<RT, RP...> const& original_view)
973  : ScatterView(execution_space(), original_view) {}
974 
975  template <typename RT, typename... P, typename... RP>
976  ScatterView(execution_space const& exec_space,
977  View<RT, RP...> const& original_view)
978  : unique_token(),
979  internal_view(
980  view_alloc(WithoutInitializing,
981  std::string("duplicated_") + original_view.label(),
982  exec_space),
983  unique_token.size(),
984  original_view.rank_dynamic > 0 ? original_view.extent(0)
985  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
986  original_view.rank_dynamic > 1 ? original_view.extent(1)
987  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
988  original_view.rank_dynamic > 2 ? original_view.extent(2)
989  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
990  original_view.rank_dynamic > 3 ? original_view.extent(3)
991  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
992  original_view.rank_dynamic > 4 ? original_view.extent(4)
993  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
994  original_view.rank_dynamic > 5 ? original_view.extent(5)
995  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
996  original_view.rank_dynamic > 6 ? original_view.extent(6)
997  : KOKKOS_IMPL_CTOR_DEFAULT_ARG)
998 
999  {
1000  reset(exec_space);
1001  }
1002 
1003  template <typename... Dims>
1004  ScatterView(std::string const& name, Dims... dims)
1005  : ScatterView(view_alloc(execution_space(), name), dims...) {}
1006 
1007  // This overload allows specifying an execution space instance to be
1008  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1009  // first argument.
1010  template <typename... P, typename... Dims>
1011  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
1012  : internal_view(view_alloc(WithoutInitializing,
1013  static_cast<::Kokkos::Impl::ViewCtorProp<
1014  void, std::string> const&>(arg_prop)
1015  .value),
1016  unique_token.size(), dims...) {
1017  using ::Kokkos::Impl::Experimental::
1018  check_scatter_view_allocation_properties_argument;
1019  check_scatter_view_allocation_properties_argument(arg_prop);
1020 
1021  auto const exec_space =
1022  static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1023  arg_prop)
1024  .value;
1025  reset(exec_space);
1026  }
1027 
1028  template <typename OverrideContribution = Contribution>
1029  KOKKOS_FORCEINLINE_FUNCTION
1030  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1031  ScatterDuplicated, Contribution, OverrideContribution>
1032  access() const {
1033  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1034  ScatterDuplicated, Contribution, OverrideContribution>(
1035  *this);
1036  }
1037 
1038  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutRight,
1039  internal_view_type::rank,
1040  internal_view_type>::value_type
1041  subview() const {
1042  return Kokkos::Impl::Experimental::Slice<
1043  Kokkos::LayoutRight, internal_view_type::Rank,
1044  internal_view_type>::get(internal_view, 0);
1045  }
1046 
1047  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1048  return internal_view.is_allocated();
1049  }
1050 
1051  template <typename DT, typename... RP>
1052  void contribute_into(View<DT, RP...> const& dest) const {
1053  contribute_into(execution_space(), dest);
1054  }
1055 
1056  template <typename DT, typename... RP>
1057  void contribute_into(execution_space const& exec_space,
1058  View<DT, RP...> const& dest) const {
1059  using dest_type = View<DT, RP...>;
1060  static_assert(std::is_same<typename dest_type::array_layout,
1061  Kokkos::LayoutRight>::value,
1062  "ScatterView deep_copy destination has different layout");
1063  static_assert(
1065  execution_space, typename dest_type::memory_space>::accessible,
1066  "ScatterView deep_copy destination memory space not accessible");
1067  bool is_equal = (dest.data() == internal_view.data());
1068  size_t start = is_equal ? 1 : 0;
1069  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1070  original_value_type, Op>(
1071  exec_space, internal_view.data(), dest.data(), internal_view.stride(0),
1072  start, internal_view.extent(0), internal_view.label());
1073  }
1074 
1075  void reset(execution_space const& exec_space = execution_space()) {
1076  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1077  original_value_type, Op>(
1078  exec_space, internal_view.data(), internal_view.size(),
1079  internal_view.label());
1080  }
1081 
1082  template <typename DT, typename... RP>
1083  void reset_except(View<DT, RP...> const& view) {
1084  reset_except(execution_space(), view);
1085  }
1086 
1087  template <typename DT, typename... RP>
1088  void reset_except(execution_space const& exec_space,
1089  View<DT, RP...> const& view) {
1090  if (view.data() != internal_view.data()) {
1091  reset(exec_space);
1092  return;
1093  }
1094  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1095  original_value_type, Op>(
1096  exec_space, internal_view.data() + view.size(),
1097  internal_view.size() - view.size(), internal_view.label());
1098  }
1099 
1100  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1101  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1102  const size_t n6 = 0) {
1103  ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
1104  n6);
1105  }
1106 
1107  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1108  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1109  const size_t n6 = 0) {
1110  ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
1111  n5, n6);
1112  }
1113 
1114  protected:
1115  template <typename... Args>
1116  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank,
1117  Args... args) const {
1118  return internal_view(rank, args...);
1119  }
1120 
1121  protected:
1122  using unique_token_type = Kokkos::Experimental::UniqueToken<
1123  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1124 
1125  unique_token_type unique_token;
1126  internal_view_type internal_view;
1127 };
1128 
1129 template <typename DataType, typename Op, typename DeviceType,
1130  typename Contribution>
1131 class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
1132  ScatterDuplicated, Contribution> {
1133  public:
1134  using execution_space = typename DeviceType::execution_space;
1135  using memory_space = typename DeviceType::memory_space;
1136  using device_type = Kokkos::Device<execution_space, memory_space>;
1137  using original_view_type =
1139  using original_value_type = typename original_view_type::value_type;
1140  using original_reference_type = typename original_view_type::reference_type;
1141  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1142  ScatterDuplicated, Contribution, ScatterNonAtomic>;
1143  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1144  ScatterDuplicated, Contribution, ScatterAtomic>;
1145  template <class, class, class, class, class, class>
1146  friend class ScatterView;
1147 
1148  using data_type_info =
1149  typename Kokkos::Impl::Experimental::DuplicatedDataType<
1150  DataType, Kokkos::LayoutLeft>;
1151  using internal_data_type = typename data_type_info::value_type;
1152  using internal_view_type =
1153  Kokkos::View<internal_data_type, Kokkos::LayoutLeft, device_type>;
1154 
1155  ScatterView() = default;
1156 
1157  template <typename RT, typename... RP>
1158  ScatterView(View<RT, RP...> const& original_view)
1159  : ScatterView(execution_space(), original_view) {}
1160 
1161  template <typename RT, typename... P, typename... RP>
1162  ScatterView(execution_space const& exec_space,
1163  View<RT, RP...> const& original_view)
1164  : unique_token() {
1165  size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0)
1166  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1167  original_view.rank > 1 ? original_view.extent(1)
1168  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1169  original_view.rank > 2 ? original_view.extent(2)
1170  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1171  original_view.rank > 3 ? original_view.extent(3)
1172  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1173  original_view.rank > 4 ? original_view.extent(4)
1174  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1175  original_view.rank > 5 ? original_view.extent(5)
1176  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1177  original_view.rank > 6 ? original_view.extent(6)
1178  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1179  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1180  arg_N[internal_view_type::rank - 1] = unique_token.size();
1181  internal_view = internal_view_type(
1182  view_alloc(WithoutInitializing,
1183  std::string("duplicated_") + original_view.label(),
1184  exec_space),
1185  arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6],
1186  arg_N[7]);
1187  reset(exec_space);
1188  }
1189 
1190  template <typename... Dims>
1191  ScatterView(std::string const& name, Dims... dims)
1192  : ScatterView(view_alloc(execution_space(), name), dims...) {}
1193 
1194  // This overload allows specifying an execution space instance to be
1195  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1196  // first argument.
1197  template <typename... P, typename... Dims>
1198  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop,
1199  Dims... dims) {
1200  using ::Kokkos::Impl::Experimental::
1201  check_scatter_view_allocation_properties_argument;
1202  check_scatter_view_allocation_properties_argument(arg_prop);
1203 
1204  original_view_type original_view;
1205  size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0)
1206  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1207  original_view.rank > 1 ? original_view.static_extent(1)
1208  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1209  original_view.rank > 2 ? original_view.static_extent(2)
1210  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1211  original_view.rank > 3 ? original_view.static_extent(3)
1212  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1213  original_view.rank > 4 ? original_view.static_extent(4)
1214  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1215  original_view.rank > 5 ? original_view.static_extent(5)
1216  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1217  original_view.rank > 6 ? original_view.static_extent(6)
1218  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1219  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1220  Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...);
1221  arg_N[internal_view_type::rank - 1] = unique_token.size();
1222 
1223  auto const name =
1224  static_cast<::Kokkos::Impl::ViewCtorProp<void, std::string> const&>(
1225  arg_prop)
1226  .value;
1227  internal_view = internal_view_type(view_alloc(WithoutInitializing, name),
1228  arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1229  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1230 
1231  auto const exec_space =
1232  static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1233  arg_prop)
1234  .value;
1235  reset(exec_space);
1236  }
1237 
1238  template <typename OtherDataType, typename OtherDeviceType>
1239  KOKKOS_FUNCTION ScatterView(
1240  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1241  ScatterDuplicated, Contribution>& other_view)
1242  : unique_token(other_view.unique_token),
1243  internal_view(other_view.internal_view) {}
1244 
1245  template <typename OtherDataType, typename OtherDeviceType>
1246  KOKKOS_FUNCTION void operator=(
1247  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1248  ScatterDuplicated, Contribution>& other_view) {
1249  unique_token = other_view.unique_token;
1250  internal_view = other_view.internal_view;
1251  }
1252 
1253  template <typename OverrideContribution = Contribution>
1254  KOKKOS_FORCEINLINE_FUNCTION
1255  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1256  ScatterDuplicated, Contribution, OverrideContribution>
1257  access() const {
1258  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1259  ScatterDuplicated, Contribution, OverrideContribution>(
1260  *this);
1261  }
1262 
1263  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutLeft,
1264  internal_view_type::rank,
1265  internal_view_type>::value_type
1266  subview() const {
1267  return Kokkos::Impl::Experimental::Slice<
1268  Kokkos::LayoutLeft, internal_view_type::rank,
1269  internal_view_type>::get(internal_view, 0);
1270  }
1271 
1272  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1273  return internal_view.is_allocated();
1274  }
1275 
1276  template <typename... RP>
1277  void contribute_into(View<RP...> const& dest) const {
1278  contribute_into(execution_space(), dest);
1279  }
1280 
1281  template <typename... RP>
1282  void contribute_into(execution_space const& exec_space,
1283  View<RP...> const& dest) const {
1284  using dest_type = View<RP...>;
1285  static_assert(
1286  std::is_same<typename dest_type::value_type,
1287  typename original_view_type::non_const_value_type>::value,
1288  "ScatterView deep_copy destination has wrong value_type");
1289  static_assert(std::is_same<typename dest_type::array_layout,
1290  Kokkos::LayoutLeft>::value,
1291  "ScatterView deep_copy destination has different layout");
1292  static_assert(
1294  execution_space, typename dest_type::memory_space>::accessible,
1295  "ScatterView deep_copy destination memory space not accessible");
1296  auto extent = internal_view.extent(internal_view_type::rank - 1);
1297  bool is_equal = (dest.data() == internal_view.data());
1298  size_t start = is_equal ? 1 : 0;
1299  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1300  original_value_type, Op>(
1301  exec_space, internal_view.data(), dest.data(),
1302  internal_view.stride(internal_view_type::rank - 1), start, extent,
1303  internal_view.label());
1304  }
1305 
1306  void reset(execution_space const& exec_space = execution_space()) {
1307  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1308  original_value_type, Op>(
1309  exec_space, internal_view.data(), internal_view.size(),
1310  internal_view.label());
1311  }
1312 
1313  template <typename DT, typename... RP>
1314  void reset_except(View<DT, RP...> const& view) {
1315  reset_except(execution_space(), view);
1316  }
1317 
1318  template <typename DT, typename... RP>
1319  void reset_except(execution_space const& exec_space,
1320  View<DT, RP...> const& view) {
1321  if (view.data() != internal_view.data()) {
1322  reset(exec_space);
1323  return;
1324  }
1325  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1326  original_value_type, Op>(
1327  exec_space, internal_view.data() + view.size(),
1328  internal_view.size() - view.size(), internal_view.label());
1329  }
1330 
1331  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1332  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1333  const size_t n6 = 0) {
1334  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1335  const int i = internal_view.rank - 1;
1336  arg_N[i] = unique_token.size();
1337 
1338  ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1339  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1340  }
1341 
1342  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1343  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1344  const size_t n6 = 0) {
1345  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1346  const int i = internal_view.rank - 1;
1347  arg_N[i] = unique_token.size();
1348 
1349  ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1350  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1351  }
1352 
1353  protected:
1354  template <typename... Args>
1355  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int thread_id,
1356  Args... args) const {
1357  return internal_view(args..., thread_id);
1358  }
1359 
1360  protected:
1361  using unique_token_type = Kokkos::Experimental::UniqueToken<
1362  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1363 
1364  unique_token_type unique_token;
1365  internal_view_type internal_view;
1366 };
1367 
1368 /* This object has to be separate in order to store the thread ID, which cannot
1369  be obtained until one is inside a parallel construct, and may be relatively
1370  expensive to obtain at every contribution
1371  (calls a non-inlined function, looks up a thread-local variable).
1372  Due to the expense, it is sensible to query it at most once per parallel
1373  iterate (ideally once per thread, but parallel_for doesn't expose that) and
1374  then store it in a stack variable.
1375  ScatterAccess serves as a non-const object on the stack which can store the
1376  thread ID */
1377 
1378 template <typename DataType, typename Op, typename DeviceType, typename Layout,
1379  typename Contribution, typename OverrideContribution>
1380 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
1381  Contribution, OverrideContribution> {
1382  public:
1383  using view_type = ScatterView<DataType, Layout, DeviceType, Op,
1384  ScatterDuplicated, Contribution>;
1385  using original_value_type = typename view_type::original_value_type;
1386  using value_type = Kokkos::Impl::Experimental::ScatterValue<
1387  original_value_type, Op, DeviceType, OverrideContribution>;
1388 
1389  KOKKOS_FORCEINLINE_FUNCTION
1390  ScatterAccess(view_type const& view_in)
1391  : view(view_in), thread_id(view_in.unique_token.acquire()) {}
1392 
1393  KOKKOS_FORCEINLINE_FUNCTION
1394  ~ScatterAccess() {
1395  if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
1396  }
1397 
1398  template <typename... Args>
1399  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
1400  return view.at(thread_id, args...);
1401  }
1402 
1403  template <typename Arg>
1404  KOKKOS_FORCEINLINE_FUNCTION
1405  typename std::enable_if<view_type::original_view_type::rank == 1 &&
1406  std::is_integral<Arg>::value,
1407  value_type>::type
1408  operator[](Arg arg) const {
1409  return view.at(thread_id, arg);
1410  }
1411 
1412  private:
1413  view_type const& view;
1414 
1415  // simplify RAII by disallowing copies
1416  ScatterAccess(ScatterAccess const& other) = delete;
1417  ScatterAccess& operator=(ScatterAccess const& other) = delete;
1418  ScatterAccess& operator=(ScatterAccess&& other) = delete;
1419 
1420  public:
1421  // do need to allow moves though, for the common
1422  // auto b = a.access();
1423  // that assignments turns into a move constructor call
1424  KOKKOS_FORCEINLINE_FUNCTION
1425  ScatterAccess(ScatterAccess&& other)
1426  : view(other.view), thread_id(other.thread_id) {
1427  other.thread_id = ~thread_id_type(0);
1428  }
1429 
1430  private:
1431  using unique_token_type = typename view_type::unique_token_type;
1432  using thread_id_type = typename unique_token_type::size_type;
1433  thread_id_type thread_id;
1434 };
1435 
1436 template <typename Op = Kokkos::Experimental::ScatterSum,
1437  typename Duplication = void, typename Contribution = void,
1438  typename RT, typename... RP>
1439 ScatterView<
1440  RT, typename ViewTraits<RT, RP...>::array_layout,
1441  typename ViewTraits<RT, RP...>::device_type, Op,
1442  std::conditional_t<
1443  std::is_same<Duplication, void>::value,
1444  typename Kokkos::Impl::Experimental::DefaultDuplication<
1445  typename ViewTraits<RT, RP...>::execution_space>::type,
1446  Duplication>,
1447  std::conditional_t<
1448  std::is_same<Contribution, void>::value,
1449  typename Kokkos::Impl::Experimental::DefaultContribution<
1450  typename ViewTraits<RT, RP...>::execution_space,
1451  typename std::conditional_t<
1452  std::is_same<Duplication, void>::value,
1453  typename Kokkos::Impl::Experimental::DefaultDuplication<
1454  typename ViewTraits<RT, RP...>::execution_space>::type,
1455  Duplication>>::type,
1456  Contribution>>
1457 create_scatter_view(View<RT, RP...> const& original_view) {
1458  return original_view; // implicit ScatterView constructor call
1459 }
1460 
1461 template <typename Op, typename RT, typename... RP>
1462 ScatterView<
1463  RT, typename ViewTraits<RT, RP...>::array_layout,
1464  typename ViewTraits<RT, RP...>::device_type, Op,
1465  typename Kokkos::Impl::Experimental::DefaultDuplication<
1466  typename ViewTraits<RT, RP...>::execution_space>::type,
1467  typename Kokkos::Impl::Experimental::DefaultContribution<
1468  typename ViewTraits<RT, RP...>::execution_space,
1469  typename Kokkos::Impl::Experimental::DefaultDuplication<
1470  typename ViewTraits<RT, RP...>::execution_space>::type>::type>
1471 create_scatter_view(Op, View<RT, RP...> const& original_view) {
1472  return original_view; // implicit ScatterView constructor call
1473 }
1474 
1475 template <typename Op, typename Duplication, typename Contribution, typename RT,
1476  typename... RP>
1477 ScatterView<RT, typename ViewTraits<RT, RP...>::array_layout,
1478  typename ViewTraits<RT, RP...>::device_type, Op, Duplication,
1479  Contribution>
1480 create_scatter_view(Op, Duplication, Contribution,
1481  View<RT, RP...> const& original_view) {
1482  return original_view; // implicit ScatterView constructor call
1483 }
1484 
1485 } // namespace Experimental
1486 } // namespace Kokkos
1487 
1488 namespace Kokkos {
1489 namespace Experimental {
1490 
1491 template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1492  typename CT, typename DP, typename... VP>
1493 void contribute(
1494  typename ES::execution_space const& exec_space, View<DT1, VP...>& dest,
1495  Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1496  src.contribute_into(exec_space, dest);
1497 }
1498 
1499 template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1500  typename CT, typename DP, typename... VP>
1501 void contribute(
1502  View<DT1, VP...>& dest,
1503  Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1504  using execution_space = typename ES::execution_space;
1505  contribute(execution_space{}, dest, src);
1506 }
1507 
1508 } // namespace Experimental
1509 } // namespace Kokkos
1510 
1511 namespace Kokkos {
1512 
1513 template <typename DT, typename LY, typename ES, typename OP, typename CT,
1514  typename DP, typename... IS>
1515 void realloc(
1516  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1517  IS... is) {
1518  scatter_view.realloc(is...);
1519 }
1520 
1521 template <typename DT, typename LY, typename ES, typename OP, typename CT,
1522  typename DP, typename... IS>
1523 void resize(
1524  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1525  IS... is) {
1526  scatter_view.resize(is...);
1527 }
1528 
1529 } // namespace Kokkos
1530 
1531 #endif
View
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
Can AccessSpace access MemorySpace ?
class to generate unique ids base on the required amount of concurrency
View to an array of data.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
Definition: dummy.cpp:3