1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h 2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h 6 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
25 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 26 template <
typename TDim>
28 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 30 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 31 template <
typename TDim>
32 struct requires_single_thread_per_block<
alpaka::AccGpuHipRt<TDim, Idx>> :
public std::false_type {};
33 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 36 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
40 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
42 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
56 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
59 using Dim = alpaka::Dim<TAcc>;
60 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
73 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
77 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
78 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
79 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
83 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
84 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
85 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
104 if constexpr (requires_single_thread_per_block_v<TAcc>) {
114 last_ =
std::min(first_ + elements_, extent_);
115 if (index_ < extent_)
133 return (index_ ==
other.index_) and (first_ ==
other.first_);
160 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
163 using Dim = alpaka::Dim<TAcc>;
164 using Vec = alpaka::Vec<Dim, Idx>;
167 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
168 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
169 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
173 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
174 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
175 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
205 thread_{alpaka::elementwise_min(
first,
loop->extent_)},
206 range_{alpaka::elementwise_min(
first +
loop->elements_,
loop->extent_)},
211 bool overflow =
false;
213 if (index_[
I] >= range_[
I]) {
214 index_[
I] = thread_[
I];
222 if constexpr (
N == 0) {
226 if (not nth_elements_loop<N - 1>()) {
229 return do_elements_loops<
N - 1>();
236 bool overflow =
false;
237 thread_[
I] += loop_->stride_[
I];
238 if (thread_[
I] >= loop_->extent_[
I]) {
239 thread_[
I] = loop_->first_[
I];
242 index_[
I] = thread_[
I];
243 range_[
I] =
std::min(thread_[
I] + loop_->elements_[
I], loop_->extent_[
I]);
249 if constexpr (
N == 0) {
253 if (not nth_strided_loop<N - 1>()) {
256 return do_strided_loops<
N - 1>();
263 if constexpr (requires_single_thread_per_block_v<TAcc>) {
266 if (not do_elements_loops<Dim::value>()) {
274 if (not do_strided_loops<Dim::value>()) {
280 thread_ = loop_->extent_;
281 range_ = loop_->extent_;
282 index_ = loop_->extent_;
307 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
const std::complex< double > I
alpaka::Vec< TDim, Idx > Vec