1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 6 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
25 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 26 template <
typename TDim>
28 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 30 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 31 template <
typename TDim>
32 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
33 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 35 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 36 template <
typename TDim>
37 struct requires_single_thread_per_block<
alpaka::AccCpuThreads<TDim, Idx>> :
public std::false_type {};
38 #endif // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 41 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
45 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
47 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
61 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
64 using Dim = alpaka::Dim<TAcc>;
65 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
91 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
95 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
96 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
97 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
101 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
102 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
103 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
122 if constexpr (requires_single_thread_per_block_v<TAcc>) {
132 range_ =
std::min(first_ + elements_, extent_);
133 if (index_ < extent_)
151 return (index_ ==
other.index_) and (first_ ==
other.first_);
178 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
181 using Dim = alpaka::Dim<TAcc>;
182 using Vec = alpaka::Vec<Dim, Idx>;
185 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
186 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
187 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
191 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
192 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
193 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
226 first_{alpaka::elementwise_min(
first,
loop->extent_)},
227 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
232 : loop_{
loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
236 bool overflow =
false;
238 if (index_[
I] >= range_[
I]) {
239 index_[
I] = first_[
I];
247 if constexpr (
N == 0) {
251 if (not nth_elements_loop<N - 1>()) {
254 return do_elements_loops<
N - 1>();
261 bool overflow =
false;
262 first_[
I] += loop_->stride_[
I];
263 if (first_[
I] >= loop_->extent_[
I]) {
264 first_[
I] = loop_->thread_[
I];
267 index_[
I] = first_[
I];
268 range_[
I] =
std::min(first_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
274 if constexpr (
N == 0) {
278 if (not nth_strided_loop<N - 1>()) {
281 return do_strided_loops<
N - 1>();
288 if constexpr (requires_single_thread_per_block_v<TAcc>) {
291 if (not do_elements_loops<Dim::value>()) {
299 if (not do_strided_loops<Dim::value>()) {
305 first_ = loop_->extent_;
306 range_ = loop_->extent_;
307 index_ = loop_->extent_;
321 if ((thread_ < extent_).
all()) {
357 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
361 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
362 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
367 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
368 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]},
369 extent_{
divide_up_by(extent, alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u])} {}
384 if (first_ < extent_)
432 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
436 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]},
437 local_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] *
438 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
439 range_{local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]} {}
442 : first_{
block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]},
444 alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] *
445 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u])},
446 range_{
std::min(extent - first_, local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u])} {}
458 if constexpr (requires_single_thread_per_block_v<TAcc>) {
506 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
508 return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
518 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
520 return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
525 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec