48 #ifndef KOKKOS_PARALLEL_HPP 49 #define KOKKOS_PARALLEL_HPP 52 #include <Kokkos_Core_fwd.hpp> 53 #include <Kokkos_View.hpp> 54 #include <Kokkos_ExecPolicy.hpp> 56 #include <impl/Kokkos_Tools.hpp> 57 #include <type_traits> 60 #include <impl/Kokkos_Tags.hpp> 61 #include <impl/Kokkos_Traits.hpp> 62 #include <impl/Kokkos_FunctorAnalysis.hpp> 63 #include <impl/Kokkos_FunctorAdapter.hpp> 65 #ifdef KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 75 template <
class T,
class =
void>
76 struct is_detected_execution_space : std::false_type {
77 using type = not_a_type;
81 struct is_detected_execution_space<T, void_t<typename T::execution_space>>
83 using type =
typename T::execution_space;
87 using detected_execution_space_t =
88 typename is_detected_execution_space<T>::type;
90 template <
class T,
class =
void>
91 struct is_detected_device_type : std::false_type {
92 using type = not_a_type;
96 struct is_detected_device_type<T, void_t<typename T::device_type>>
98 using type =
typename T::device_type;
102 using detected_device_type_t =
typename is_detected_device_type<T>::type;
113 template <
class Functor,
class Policy>
114 struct FunctorPolicyExecutionSpace {
115 using execution_space = std::conditional_t<
116 is_detected_execution_space<Policy>::value,
117 detected_execution_space_t<Policy>,
119 is_detected_execution_space<Functor>::value,
120 detected_execution_space_t<Functor>,
122 is_detected_device_type<Functor>::value,
123 detected_execution_space_t<detected_device_type_t<Functor>>,
124 Kokkos::DefaultExecutionSpace>>>;
156 template <
class ExecPolicy,
class FunctorType>
158 const ExecPolicy& policy,
const FunctorType& functor,
159 const std::string& str =
"",
160 typename std::enable_if<
161 Kokkos::Impl::is_execution_policy<ExecPolicy>::value>::type* =
165 ExecPolicy inner_policy = policy;
166 Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
168 Kokkos::Impl::shared_allocation_tracking_disable();
170 Kokkos::Impl::shared_allocation_tracking_enable();
174 Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
177 template <
class FunctorType>
178 inline void parallel_for(
const size_t work_count,
const FunctorType& functor,
179 const std::string& str =
"") {
180 using execution_space =
181 typename Impl::FunctorPolicyExecutionSpace<FunctorType,
182 void>::execution_space;
183 using policy = RangePolicy<execution_space>;
187 policy execution_policy = policy(0, work_count);
189 Kokkos::Tools::Impl::begin_parallel_for(execution_policy, functor, str, kpID);
191 Kokkos::Impl::shared_allocation_tracking_disable();
192 Impl::ParallelFor<FunctorType, policy> closure(functor, execution_policy);
193 Kokkos::Impl::shared_allocation_tracking_enable();
197 Kokkos::Tools::Impl::end_parallel_for(execution_policy, functor, str, kpID);
200 template <
class ExecPolicy,
class FunctorType>
201 inline void parallel_for(
const std::string& str,
const ExecPolicy& policy,
202 const FunctorType& functor) {
203 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 205 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
208 ::Kokkos::parallel_for(policy, functor, str);
210 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 212 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
219 #include <Kokkos_Parallel_Reduce.hpp> 387 template <
class ExecutionPolicy,
class FunctorType>
388 inline void parallel_scan(
389 const ExecutionPolicy& policy,
const FunctorType& functor,
390 const std::string& str =
"",
391 typename std::enable_if<
392 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
395 ExecutionPolicy inner_policy = policy;
396 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
398 Kokkos::Impl::shared_allocation_tracking_disable();
399 Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
401 Kokkos::Impl::shared_allocation_tracking_enable();
405 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
408 template <
class FunctorType>
409 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
410 const std::string& str =
"") {
411 using execution_space =
413 void>::execution_space;
418 policy execution_policy(0, work_count);
419 Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
421 Kokkos::Impl::shared_allocation_tracking_disable();
422 Impl::ParallelScan<FunctorType, policy> closure(functor, execution_policy);
423 Kokkos::Impl::shared_allocation_tracking_enable();
427 Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
430 template <
class ExecutionPolicy,
class FunctorType>
431 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
432 const FunctorType& functor) {
433 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 435 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
438 ::Kokkos::parallel_scan(policy, functor, str);
440 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 442 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
447 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
448 inline void parallel_scan(
449 const ExecutionPolicy& policy,
const FunctorType& functor,
450 ReturnType& return_value,
const std::string& str =
"",
451 typename std::enable_if<
452 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
455 ExecutionPolicy inner_policy = policy;
456 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
458 Kokkos::Impl::shared_allocation_tracking_disable();
459 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType> closure(
460 functor, inner_policy, return_value);
461 Kokkos::Impl::shared_allocation_tracking_enable();
465 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
467 policy.space().fence();
470 template <
class FunctorType,
class ReturnType>
471 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
473 const std::string& str =
"") {
474 using execution_space =
476 void>::execution_space;
480 policy execution_policy(0, work_count);
482 Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
485 Kokkos::Impl::shared_allocation_tracking_disable();
486 Impl::ParallelScanWithTotal<FunctorType, policy, ReturnType> closure(
487 functor, execution_policy, return_value);
488 Kokkos::Impl::shared_allocation_tracking_enable();
492 Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
494 execution_space().fence();
497 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
498 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
499 const FunctorType& functor,
501 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 503 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
506 ::Kokkos::parallel_scan(policy, functor, return_value, str);
508 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 510 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
523 template <
class FunctorType,
524 bool HasTeamShmemSize =
525 has_member_team_shmem_size<FunctorType>::value,
526 bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
527 struct FunctorTeamShmemSize {
528 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType&,
int) {
533 template <
class FunctorType>
534 struct FunctorTeamShmemSize<FunctorType, true, false> {
535 static inline size_t value(
const FunctorType& f,
int team_size) {
536 return f.team_shmem_size(team_size);
540 template <
class FunctorType>
541 struct FunctorTeamShmemSize<FunctorType, false, true> {
542 static inline size_t value(
const FunctorType& f,
int team_size) {
543 return f.shmem_size(team_size);
546 template <
class FunctorType>
547 struct FunctorTeamShmemSize<FunctorType, true, true> {
548 static inline size_t value(
const FunctorType& ,
int ) {
550 "Functor with both team_shmem_size and shmem_size defined is " Implementation of the ParallelFor operator that has a partial specialization for the device...
Given a Functor and Execution Policy query an execution space.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.