1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 6 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
25 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 26 template <
typename TDim>
28 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 30 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 31 template <
typename TDim>
32 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
33 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 35 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 36 template <
typename TDim>
37 struct requires_single_thread_per_block<
alpaka::AccCpuThreads<TDim, Idx>> :
public std::false_type {};
38 #endif // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 41 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
45 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
47 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
61 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
64 using Dim = alpaka::Dim<TAcc>;
65 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
96 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
100 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
101 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
102 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
106 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
107 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
108 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
112 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
113 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_ +
first},
114 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
133 if constexpr (requires_single_thread_per_block_v<TAcc>) {
143 range_ =
std::min(first_ + elements_, extent_);
144 if (index_ < extent_)
162 return (index_ ==
other.index_) and (first_ ==
other.first_);
189 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
192 using Dim = alpaka::Dim<TAcc>;
193 using Vec = alpaka::Vec<Dim, Idx>;
196 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
197 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
198 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
202 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
203 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
204 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
237 first_{alpaka::elementwise_min(
first,
loop->extent_)},
238 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
243 : loop_{
loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
247 bool overflow =
false;
249 if (index_[
I] >= range_[
I]) {
250 index_[
I] = first_[
I];
258 if constexpr (
N == 0) {
262 if (not nth_elements_loop<N - 1>()) {
265 return do_elements_loops<
N - 1>();
272 bool overflow =
false;
273 first_[
I] += loop_->stride_[
I];
274 if (first_[
I] >= loop_->extent_[
I]) {
275 first_[
I] = loop_->thread_[
I];
278 index_[
I] = first_[
I];
279 range_[
I] =
std::min(first_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
285 if constexpr (
N == 0) {
289 if (not nth_strided_loop<N - 1>()) {
292 return do_strided_loops<
N - 1>();
299 if constexpr (requires_single_thread_per_block_v<TAcc>) {
302 if (not do_elements_loops<Dim::value>()) {
310 if (not do_strided_loops<Dim::value>()) {
316 first_ = loop_->extent_;
317 range_ = loop_->extent_;
318 index_ = loop_->extent_;
332 if ((thread_ < extent_).
all()) {
368 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
372 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
373 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
378 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
379 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
380 extent_{
divide_up_by(extent, alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u])} {}
395 if (first_ < extent_)
443 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
447 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]},
448 local_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] *
449 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
450 range_{local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]} {}
453 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]},
455 alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] *
456 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u])},
457 range_{
std::min(extent - first_, local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u])} {}
469 if constexpr (requires_single_thread_per_block_v<TAcc>) {
531 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
547 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
562 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
566 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
567 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
572 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
573 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
589 if (first_ < extent_)
636 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
640 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
641 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
642 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
646 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
647 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
648 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
667 if constexpr (requires_single_thread_per_block_v<TAcc>) {
677 range_ =
std::min(first_ + elements_, extent_);
678 if (index_ < extent_)
696 return (index_ ==
other.index_) and (first_ ==
other.first_);
730 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
732 return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
742 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
744 return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
749 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec