1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 8 #include <alpaka/alpaka.hpp> 23 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
26 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 27 template <
typename TDim>
29 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 31 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 32 template <
typename TDim>
33 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
34 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 36 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 37 template <
typename TDim>
38 struct requires_single_thread_per_block<
alpaka::AccCpuThreads<TDim, Idx>> :
public std::false_type {};
39 #endif // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 42 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
46 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
48 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
62 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
65 using Dim = alpaka::Dim<TAcc>;
66 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
152 template <
typename TAcc,
158 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
159 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
160 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
164 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
165 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
166 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
170 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
171 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_ +
first},
172 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
175 class const_iterator;
198 if constexpr (requires_single_thread_per_block_v<TAcc>) {
208 range_ =
std::min(first_ + elements_, extent_);
209 if (index_ < extent_)
227 return (index_ ==
other.index_) and (first_ ==
other.first_);
308 template <
typename TAcc,
321 template <
typename TAcc,
334 template <
typename TAcc,
341 template <
typename TAcc,
348 template <
typename TAcc,
396 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
399 using Dim = alpaka::Dim<TAcc>;
400 using Vec = alpaka::Vec<Dim, Idx>;
403 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
404 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
405 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
409 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
410 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
411 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
422 if ((thread_ < extent_).
all()) {
456 return (index_ ==
other.index_);
460 return not(*
this ==
other);
467 first_{alpaka::elementwise_min(
first,
loop->extent_)},
468 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
473 : loop_{
loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
477 bool overflow =
false;
479 if (index_[
I] >= range_[
I]) {
480 index_[
I] = first_[
I];
492 if (not nth_elements_loop<N - 1>()) {
495 return do_elements_loops<
N - 1>();
502 bool overflow =
false;
503 first_[
I] += loop_->stride_[
I];
504 if (first_[
I] >= loop_->extent_[
I]) {
505 first_[
I] = loop_->thread_[
I];
508 index_[
I] = first_[
I];
509 range_[
I] =
std::min(first_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
519 if (not nth_strided_loop<N - 1>()) {
522 return do_strided_loops<
N - 1>();
529 if constexpr (requires_single_thread_per_block_v<TAcc>) {
532 if (not do_elements_loops<Dim::value>()) {
540 if (not do_strided_loops<Dim::value>()) {
546 first_ = loop_->extent_;
547 range_ = loop_->extent_;
548 index_ = loop_->extent_;
574 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
579 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
638 template <
typename TAcc,
644 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
645 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
650 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
651 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
652 extent_{
divide_up_by(extent, alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim])} {}
654 class const_iterator;
674 if (first_ < extent_)
756 template <
typename TAcc,
769 template <
typename TAcc,
782 template <
typename TAcc,
789 template <
typename TAcc,
796 template <
typename TAcc,
856 template <
typename TAcc,
862 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
863 local_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
864 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
865 range_{local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]} {}
868 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
870 alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
871 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])},
872 range_{
std::min(extent - first_, local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])} {}
874 class const_iterator;
892 if constexpr (requires_single_thread_per_block_v<TAcc>) {
975 template <
typename TAcc,
988 template <
typename TAcc,
1002 template <
typename TAcc,
1009 template <
typename TAcc,
1016 template <
typename TAcc,
1063 template <
typename TAcc,
1069 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1070 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1074 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1075 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1078 class const_iterator;
1098 if (first_ < extent_)
1167 template <
typename TAcc,
1180 template <
typename TAcc,
1194 template <
typename TAcc,
1201 template <
typename TAcc,
1208 template <
typename TAcc,
1223 template <
typename TAcc,
1229 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1230 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1231 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1235 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1236 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1237 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1241 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1242 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ +
first},
1243 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1246 class const_iterator;
1269 if constexpr (requires_single_thread_per_block_v<TAcc>) {
1272 if (index_ < range_)
1279 range_ =
std::min(first_ + elements_, extent_);
1280 if (index_ < extent_)
1298 return (index_ ==
other.index_) and (first_ ==
other.first_);
1326 template <
typename TAcc,
1339 template <
typename TAcc,
1353 template <
typename TAcc,
1360 template <
typename TAcc,
1367 template <
typename TAcc,
1381 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1383 return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
1393 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1395 return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
1400 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
constexpr uint32_t stride
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec