1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 6 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
25 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 26 template <
typename TDim>
28 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 30 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 31 template <
typename TDim>
32 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
33 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 35 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 36 template <
typename TDim>
37 struct requires_single_thread_per_block<
alpaka::AccCpuThreads<TDim, Idx>> :
public std::false_type {};
38 #endif // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED 41 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
45 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
47 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
61 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
64 using Dim = alpaka::Dim<TAcc>;
65 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
78 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
82 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
83 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
84 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
88 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
89 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
90 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
109 if constexpr (requires_single_thread_per_block_v<TAcc>) {
119 range_ =
std::min(first_ + elements_, extent_);
120 if (index_ < extent_)
138 return (index_ ==
other.index_) and (first_ ==
other.first_);
165 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
168 using Dim = alpaka::Dim<TAcc>;
169 using Vec = alpaka::Vec<Dim, Idx>;
172 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
173 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
174 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
178 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
179 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
180 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
213 first_{alpaka::elementwise_min(
first,
loop->extent_)},
214 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
219 : loop_{
loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
223 bool overflow =
false;
225 if (index_[
I] >= range_[
I]) {
226 index_[
I] = first_[
I];
234 if constexpr (
N == 0) {
238 if (not nth_elements_loop<N - 1>()) {
241 return do_elements_loops<
N - 1>();
248 bool overflow =
false;
249 first_[
I] += loop_->stride_[
I];
250 if (first_[
I] >= loop_->extent_[
I]) {
251 first_[
I] = loop_->thread_[
I];
254 index_[
I] = first_[
I];
255 range_[
I] =
std::min(first_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
261 if constexpr (
N == 0) {
265 if (not nth_strided_loop<N - 1>()) {
268 return do_strided_loops<
N - 1>();
275 if constexpr (requires_single_thread_per_block_v<TAcc>) {
278 if (not do_elements_loops<Dim::value>()) {
286 if (not do_strided_loops<Dim::value>()) {
292 first_ = loop_->extent_;
293 range_ = loop_->extent_;
294 index_ = loop_->extent_;
308 if ((thread_ < extent_).
all()) {
331 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec