CMS 3D CMS Logo

workdivision.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h
2 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h
3 
4 #include <type_traits>
5 
6 #include <alpaka/alpaka.hpp>
7 
11 
12 namespace cms::alpakatools {
13 
14  using namespace alpaka_common;
15 
16  // If the first argument is not a multiple of the second argument, round it up to the next multiple
17  inline constexpr Idx round_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor * divisor; }
18 
19  // Return the integer division of the first argument by the second argument, rounded up to the next integer
20  inline constexpr Idx divide_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor; }
21 
22  // Create an accelerator-dependent work division for 1-dimensional kernels
23  template <typename TAcc,
24  typename = std::enable_if_t<cms::alpakatools::is_accelerator_v<TAcc> and alpaka::Dim<TAcc>::value == 1>>
26 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
27  if constexpr (std::is_same_v<TAcc, alpaka::AccGpuCudaRt<Dim1D, Idx>>) {
28  // On GPU backends, each thread is looking at a single element:
29  // - the number of threads per block is "elements";
30  // - the number of elements per thread is always 1.
31  return WorkDiv<Dim1D>(blocks, elements, Idx{1});
32  } else
33 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
34 #if ALPAKA_ACC_GPU_HIP_ENABLED
35  if constexpr (std::is_same_v<TAcc, alpaka::AccGpuHipRt<Dim1D, Idx>>) {
36  // On GPU backends, each thread is looking at a single element:
37  // - the number of threads per block is "elements";
38  // - the number of elements per thread is always 1.
39  return WorkDiv<Dim1D>(blocks, elements, Idx{1});
40  } else
41 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
42  {
43  // On CPU backends, run serially with a single thread per block:
44  // - the number of threads per block is always 1;
45  // - the number of elements per thread is "elements".
46  return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
47  }
48  }
49 
50  // Create the accelerator-dependent workdiv for N-dimensional kernels
51  template <typename TAcc, typename = std::enable_if_t<cms::alpakatools::is_accelerator_v<TAcc>>>
52  inline WorkDiv<alpaka::Dim<TAcc>> make_workdiv(const Vec<alpaka::Dim<TAcc>>& blocks,
53  const Vec<alpaka::Dim<TAcc>>& elements) {
54  using Dim = alpaka::Dim<TAcc>;
55 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
56  if constexpr (std::is_same_v<TAcc, alpaka::AccGpuCudaRt<Dim, Idx>>) {
57  // On GPU backends, each thread is looking at a single element:
58  // - the number of threads per block is "elements";
59  // - the number of elements per thread is always 1.
61  } else
62 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
63 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
64  if constexpr (std::is_same_v<TAcc, alpaka::AccGpuHipRt<Dim, Idx>>) {
65  // On GPU backends, each thread is looking at a single element:
66  // - the number of threads per block is "elements";
67  // - the number of elements per thread is always 1.
69  } else
70 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
71  {
72  // On CPU backends, run serially with a single thread per block:
73  // - the number of threads per block is always 1;
74  // - the number of elements per thread is "elements".
76  }
77  }
78 
79  template <typename TAcc,
80  typename = std::enable_if_t<cms::alpakatools::is_accelerator_v<TAcc> and alpaka::Dim<TAcc>::value == 1>>
82  public:
83  ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc)
84  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
85  first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
86  stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
87  extent_{stride_} {}
88 
89  ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc, Idx extent)
90  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
91  first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
92  stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
93  extent_{extent} {}
94 
95  class iterator {
96  friend class elements_with_stride;
97 
98  ALPAKA_FN_ACC inline iterator(Idx elements, Idx stride, Idx extent, Idx first)
99  : elements_{elements},
100  stride_{stride},
101  extent_{extent},
102  first_{std::min(first, extent)},
103  index_{first_},
104  last_{std::min(first + elements, extent)} {}
105 
106  public:
107  ALPAKA_FN_ACC inline Idx operator*() const { return index_; }
108 
109  // pre-increment the iterator
110  ALPAKA_FN_ACC inline iterator& operator++() {
111  // increment the index along the elements processed by the current thread
112  ++index_;
113  if (index_ < last_)
114  return *this;
115 
116  // increment the thread index with the grid stride
117  first_ += stride_ * elements_;
118  index_ = first_;
119  last_ = std::min(first_ + elements_, extent_);
120  if (index_ < extent_)
121  return *this;
122 
123  // the iterator has reached or passed the end of the extent, clamp it to the extent
124  first_ = extent_;
125  index_ = extent_;
126  last_ = extent_;
127  return *this;
128  }
129 
130  // post-increment the iterator
131  ALPAKA_FN_ACC inline iterator operator++(int) {
132  iterator old = *this;
133  ++(*this);
134  return old;
135  }
136 
137  ALPAKA_FN_ACC inline bool operator==(iterator const& other) const {
138  return (index_ == other.index_) and (first_ == other.first_);
139  }
140 
141  ALPAKA_FN_ACC inline bool operator!=(iterator const& other) const { return not(*this == other); }
142 
143  private:
144  // non-const to support iterator copy and assignment
148  // modified by the pre/post-increment operator
152  };
153 
154  ALPAKA_FN_ACC inline iterator begin() const { return iterator(elements_, stride_, extent_, first_); }
155 
156  ALPAKA_FN_ACC inline iterator end() const { return iterator(elements_, stride_, extent_, extent_); }
157 
158  private:
159  const Idx elements_;
160  const Idx first_;
161  const Idx stride_;
162  const Idx extent_;
163  };
164 
165  template <typename TAcc,
166  typename = std::enable_if_t<cms::alpakatools::is_accelerator_v<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
168  public:
169  using Dim = alpaka::Dim<TAcc>;
170  using Vec = alpaka::Vec<Dim, Idx>;
171 
172  ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc)
173  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
174  first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
175  stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
176  extent_{stride_} {}
177 
178  ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc, Vec extent)
179  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
180  first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
181  stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
182  extent_{extent} {}
183 
184  class iterator {
186  constexpr static const auto last_dimension = Dim::value - 1;
187 
188  ALPAKA_FN_ACC inline iterator(Vec elements, Vec stride, Vec extent, Vec first)
189  : elements_{elements},
190  stride_{stride},
191  extent_{extent},
192  first_{alpaka::elementwise_min(first, extent)},
193  index_{first_},
194  last_{std::min(first[last_dimension] + elements[last_dimension], extent[last_dimension])} {}
195 
196  public:
197  ALPAKA_FN_ACC inline Vec operator*() const { return index_; }
198 
199  // pre-increment the iterator
200  ALPAKA_FN_ACC inline iterator& operator++() {
201  // increment the index along the elements processed by the current thread
202  ++index_[last_dimension];
203  if (index_[last_dimension] < last_)
204  return *this;
205 
206  // increment the thread index along with the last dimension with the grid stride
207  first_[last_dimension] += stride_[last_dimension] * elements_[last_dimension];
208  index_[last_dimension] = first_[last_dimension];
209  last_ = std::min(first_[last_dimension] + elements_[last_dimension], extent_[last_dimension]);
210  if (index_[last_dimension] < extent_[last_dimension])
211  return *this;
212 
213  // increment the thread index along the outer dimensions with the grid stride
214  if constexpr (last_dimension > 0)
215  for (auto dimension = last_dimension - 1; dimension >= 0; --dimension) {
216  first_[dimension] += stride_[dimension];
217  index_[dimension] = first_[dimension];
218  if (index_[dimension] < extent_[dimension])
219  return *this;
220  }
221 
222  // the iterator has reached or passed the end of the extent, clamp it to the extent
223  first_ = extent_;
224  index_ = extent_;
225  last_ = extent_[last_dimension];
226  return *this;
227  }
228 
229  // post-increment the iterator
230  ALPAKA_FN_ACC inline iterator operator++(int) {
231  iterator old = *this;
232  ++(*this);
233  return old;
234  }
235 
236  ALPAKA_FN_ACC inline bool operator==(iterator const& other) const {
237  return (index_ == other.index_) and (first_ == other.first_);
238  }
239 
240  ALPAKA_FN_ACC inline bool operator!=(iterator const& other) const { return not(*this == other); }
241 
242  private:
243  // non-const to support iterator copy and assignment
247  // modified by the pre/post-increment operator
251  };
252 
253  ALPAKA_FN_ACC inline iterator begin() const { return iterator(elements_, stride_, extent_, first_); }
254 
255  ALPAKA_FN_ACC inline iterator end() const { return iterator(elements_, stride_, extent_, extent_); }
256 
257  private:
258  const Vec elements_;
259  const Vec first_;
260  const Vec stride_;
261  const Vec extent_;
262  };
263 
264 } // namespace cms::alpakatools
265 
266 #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc, Vec extent)
Definition: workdivision.h:178
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc)
Definition: workdivision.h:83
WorkDiv< Dim1D > make_workdiv(Idx blocks, Idx elements)
Definition: workdivision.h:25
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc, Idx extent)
Definition: workdivision.h:89
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC constexpr auto elementwise_min(Vec< TDim, TVal > const &p, Vecs const &... qs) -> Vec< TDim, TVal >
Definition: vec.h:16
ALPAKA_FN_ACC iterator(Vec elements, Vec stride, Vec extent, Vec first)
Definition: workdivision.h:188
uint32_t Idx
Definition: config.h:13
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:154
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc)
Definition: workdivision.h:172
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
Definition: config.h:30
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:156
constexpr Idx round_up_by(Idx value, Idx divisor)
Definition: workdivision.h:17
ALPAKA_FN_ACC iterator(Idx elements, Idx stride, Idx extent, Idx first)
Definition: workdivision.h:98
ALPAKA_FN_ACC bool operator!=(iterator const &other) const
Definition: workdivision.h:240
Definition: value.py:1
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:255
alpaka::Vec< TDim, Idx > Vec
Definition: config.h:23
ALPAKA_FN_ACC bool operator==(iterator const &other) const
Definition: workdivision.h:236
ALPAKA_FN_ACC bool operator==(iterator const &other) const
Definition: workdivision.h:137
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:253
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
ALPAKA_FN_ACC bool operator!=(iterator const &other) const
Definition: workdivision.h:141