1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 6 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
25 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 26 template <
typename TDim>
28 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 30 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 31 template <
typename TDim>
32 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
33 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 35 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 36 template <
typename TDim>
37 struct requires_single_thread_per_block<
alpaka::AccCpuThreads<TDim, Idx>> :
public std::false_type {};
38 #endif // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 41 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
45 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
47 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
61 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
64 using Dim = alpaka::Dim<TAcc>;
65 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
146 template <
typename TAcc,
152 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
153 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
154 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
158 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
159 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
160 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
164 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
165 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_ +
first},
166 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
169 class const_iterator;
192 if constexpr (requires_single_thread_per_block_v<TAcc>) {
202 range_ =
std::min(first_ + elements_, extent_);
203 if (index_ < extent_)
221 return (index_ ==
other.index_) and (first_ ==
other.first_);
300 template <
typename TAcc,
312 template <
typename TAcc,
319 template <
typename TAcc,
326 template <
typename TAcc,
343 template <
typename TAcc,
387 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
390 using Dim = alpaka::Dim<TAcc>;
391 using Vec = alpaka::Vec<Dim, Idx>;
394 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
395 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
396 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
400 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
401 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
402 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
413 if ((thread_ < extent_).
all()) {
447 return (index_ ==
other.index_);
456 first_{alpaka::elementwise_min(
first,
loop->extent_)},
457 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
462 : loop_{
loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
466 bool overflow =
false;
468 if (index_[
I] >= range_[
I]) {
469 index_[
I] = first_[
I];
481 if (not nth_elements_loop<N - 1>()) {
484 return do_elements_loops<
N - 1>();
491 bool overflow =
false;
492 first_[
I] += loop_->stride_[
I];
493 if (first_[
I] >= loop_->extent_[
I]) {
494 first_[
I] = loop_->thread_[
I];
497 index_[
I] = first_[
I];
498 range_[
I] =
std::min(first_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
508 if (not nth_strided_loop<N - 1>()) {
511 return do_strided_loops<
N - 1>();
518 if constexpr (requires_single_thread_per_block_v<TAcc>) {
521 if (not do_elements_loops<Dim::value>()) {
529 if (not do_strided_loops<Dim::value>()) {
535 first_ = loop_->extent_;
536 range_ = loop_->extent_;
537 index_ = loop_->extent_;
564 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
569 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
623 template <
typename TAcc,
629 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
630 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
635 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
636 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
637 extent_{
divide_up_by(extent, alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim])} {}
639 class const_iterator;
659 if (first_ < extent_)
739 template <
typename TAcc,
751 template <
typename TAcc,
758 template <
typename TAcc,
765 template <
typename TAcc,
781 template <
typename TAcc,
836 template <
typename TAcc,
842 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
843 local_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
844 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
845 range_{local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]} {}
848 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
850 alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
851 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])},
852 range_{
std::min(extent - first_, local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])} {}
854 class const_iterator;
872 if constexpr (requires_single_thread_per_block_v<TAcc>) {
953 template <
typename TAcc,
966 template <
typename TAcc,
973 template <
typename TAcc,
980 template <
typename TAcc,
996 template <
typename TAcc,
1038 template <
typename TAcc,
1044 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1045 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1049 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1050 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1053 class const_iterator;
1073 if (first_ < extent_)
1140 template <
typename TAcc,
1153 template <
typename TAcc,
1160 template <
typename TAcc,
1167 template <
typename TAcc,
1177 template <
typename TAcc,
1183 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1184 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1185 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1189 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1190 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1191 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1195 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1196 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ +
first},
1197 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1200 class const_iterator;
1223 if constexpr (requires_single_thread_per_block_v<TAcc>) {
1226 if (index_ < range_)
1233 range_ =
std::min(first_ + elements_, extent_);
1234 if (index_ < extent_)
1252 return (index_ ==
other.index_) and (first_ ==
other.first_);
1278 template <
typename TAcc,
1291 template <
typename TAcc,
1298 template <
typename TAcc,
1305 template <
typename TAcc,
1319 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1321 return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
1331 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1333 return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
1338 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
constexpr uint32_t stride
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec