CMS 3D CMS Logo

TestAlgo.dev.cc
Go to the documentation of this file.
1 // Check that ALPAKA_HOST_ONLY is not defined during device compilation:
2 #ifdef ALPAKA_HOST_ONLY
3 #error ALPAKA_HOST_ONLY defined in device compilation
4 #endif
5 
6 #include <alpaka/alpaka.hpp>
7 
12 
13 #include "TestAlgo.h"
14 
16 
17  using namespace cms::alpakatools;
18 
20  public:
21  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
22  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
23  // global index of the thread within the grid
24  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
25  const portabletest::Array flags = {{6, 4, 2, 0}};
26 
27  // set this only once in the whole kernel grid
28  if (once_per_grid(acc)) {
29  view.r() = 1.;
30  }
31 
32  // make a strided loop over the kernel grid, covering up to "size" elements
33  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
34  view[i] = {xvalue, 0., 0., i, flags, matrix * i};
35  }
36  }
37  };
38 
40  public:
41  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
42  ALPAKA_FN_ACC void operator()(TAcc const& acc,
43  portabletest::TestDeviceMultiCollection2::View<1> view,
44  double xvalue) const {
45  // global index of the thread within the grid
46  const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
47  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
48 
49  // set this only once in the whole kernel grid
50  if (thread == 0) {
51  view.r2() = 2.;
52  }
53 
54  // make a strided loop over the kernel grid, covering up to "size" elements
55  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
56  view[i] = {xvalue, 0., 0., i, matrix * i};
57  }
58  }
59  };
60 
62  public:
63  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
64  ALPAKA_FN_ACC void operator()(TAcc const& acc,
65  portabletest::TestDeviceMultiCollection3::View<2> view,
66  double xvalue) const {
67  // global index of the thread within the grid
68  const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
69  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
70 
71  // set this only once in the whole kernel grid
72  if (thread == 0) {
73  view.r3() = 3.;
74  }
75 
76  // make a strided loop over the kernel grid, covering up to "size" elements
77  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
78  view[i] = {xvalue, 0., 0., i, matrix * i};
79  }
80  }
81  };
82 
83  void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const {
84  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
85  uint32_t items = 64;
86 
87  // use as many groups as needed to cover the whole problem
88  uint32_t groups = divide_up_by(collection->metadata().size(), items);
89 
90  // map items to
91  // - threads with a single element per thread on a GPU backend
92  // - elements within a single thread on a CPU backend
93  auto workDiv = make_workdiv<Acc1D>(groups, items);
94 
95  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue);
96  }
97 
99  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
100  uint32_t items = 64;
101 
102  // use as many groups as needed to cover the whole problem
103  uint32_t groups = divide_up_by(collection->metadata().size(), items);
104  uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items);
105 
106  // map items to
107  // - threads with a single element per thread on a GPU backend
108  // - elements within a single thread on a CPU backend
109  auto workDiv = make_workdiv<Acc1D>(groups, items);
110  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
111 
112  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
113  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
114  }
115 
117  public:
118  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
119  ALPAKA_FN_ACC void operator()(TAcc const& acc,
120  portabletest::TestDeviceObject::Product* data,
121  double x,
122  double y,
123  double z,
124  int32_t id) const {
125  // run on a single thread
126  if (once_per_grid(acc)) {
127  data->x = x;
128  data->y = y;
129  data->z = z;
130  data->id = id;
131  }
132  }
133  };
134 
136  Queue& queue, portabletest::TestDeviceObject& object, double x, double y, double z, int32_t id) const {
137  // run on a single thread
138  auto workDiv = make_workdiv<Acc1D>(1, 1);
139 
140  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
141  }
142 
144  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
145  uint32_t items = 64;
146 
147  // use as many groups as needed to cover the whole problem
148  uint32_t groups = divide_up_by(collection.view<portabletest::TestSoA>().metadata().size(), items);
149  uint32_t groups2 = divide_up_by(collection.view<portabletest::TestSoA2>().metadata().size(), items);
150  uint32_t groups3 = divide_up_by(collection.view<portabletest::TestSoA3>().metadata().size(), items);
151 
152  // map items to
153  // - threads with a single element per thread on a GPU backend
154  // - elements within a single thread on a CPU backend
155  auto workDiv = make_workdiv<Acc1D>(groups, items);
156  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
157  auto workDiv3 = make_workdiv<Acc1D>(groups3, items);
158 
159  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
160  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
161  alpaka::exec<Acc1D>(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view<portabletest::TestSoA3>(), xvalue);
162  }
163 
165  public:
166  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
167  ALPAKA_FN_ACC void operator()(TAcc const& acc,
168  portabletest::TestDeviceCollection::ConstView input,
171  // set this only once in the whole kernel grid
172  if (once_per_grid(acc)) {
173  output.r() = input.r();
174  }
175 
176  // make a strided loop over the kernel grid, covering up to "size" elements
177  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
178  double x = input[i].x();
179  if (i < esData.size()) {
180  x += esData.val(i) + esData.val2(i);
181  }
182  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
183  }
184  }
185  };
186 
188  public:
189  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
190  ALPAKA_FN_ACC void operator()(TAcc const& acc,
196  // set this only once in the whole kernel grid
197  if (once_per_grid(acc)) {
198  output.r() = input.r();
199  output2.r2() = input2.r2();
200  }
201 
202  // make a strided loop over the kernel grid, covering up to "size" elements
203  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
204  double x = input[i].x();
205  if (i < esData.size()) {
206  x += esData.val(i) + esData.val2(i);
207  }
208  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
209  }
210  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
211  double x2 = input2[i].x2();
212  if (i < esData.size()) {
213  x2 += esData.val(i) + esData.val2(i);
214  }
215  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
216  }
217  }
218  };
219 
221  public:
222  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
223  ALPAKA_FN_ACC void operator()(TAcc const& acc,
231  // set this only once in the whole kernel grid
232  if (once_per_grid(acc)) {
233  output.r() = input.r();
234  output2.r2() = input2.r2();
235  output3.r3() = input3.r3();
236  }
237 
238  // make a strided loop over the kernel grid, covering up to "size" elements
239  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
240  double x = input[i].x();
241  if (i < esData.size()) {
242  x += esData.val(i) + esData.val2(i);
243  if (0 == i)
244  printf("Setting x[0] to %f\n", x);
245  }
246  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
247  }
248  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
249  double x2 = input2[i].x2();
250  if (i < esData.size()) {
251  x2 += esData.val(i) + esData.val2(i);
252  }
253  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
254  }
255  for (int32_t i : uniform_elements(acc, output3.metadata().size())) {
256  double x3 = input3[i].x3();
257  if (i < esData.size()) {
258  x3 += esData.val(i) + esData.val2(i);
259  }
260  output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()};
261  }
262  }
263  };
264 
267  AlpakaESTestDataEDevice const& esData) const {
269 
270  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
271  uint32_t items = 64;
272 
273  // use as many groups as needed to cover the whole problem
274  uint32_t groups = divide_up_by(collection->metadata().size(), items);
275 
276  // map items to
277  // - threads with a single element per thread on a GPU backend
278  // - elements within a single thread on a CPU backend
279  auto workDiv = make_workdiv<Acc1D>(groups, items);
280 
281  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
282 
283  return collection;
284  }
285 
288  AlpakaESTestDataEDevice const& esData) const {
290 
291  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
292  uint32_t items = 64;
293 
294  // use as many groups as needed to cover the whole problem
295  auto sizes = collection.sizes();
296  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
297 
298  // map items to
299  // - threads with a single element per thread on a GPU backend
300  // - elements within a single thread on a CPU backend
301  auto workDiv = make_workdiv<Acc1D>(groups, items);
302 
303  alpaka::exec<Acc1D>(queue,
304  workDiv,
308  esData.view(),
311 
312  return collection;
313  }
314 
317  AlpakaESTestDataEDevice const& esData) const {
319 
320  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
321  uint32_t items = 64;
322 
323  // use as many groups as needed to cover the whole problem
324  auto sizes = collection.sizes();
325  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
326 
327  // map items to
328  // - threads with a single element per thread on a GPU backend
329  // - elements within a single thread on a CPU backend
330  auto workDiv = make_workdiv<Acc1D>(groups, items);
331 
332  alpaka::exec<Acc1D>(queue,
333  workDiv,
338  esData.view(),
342 
343  return collection;
344  }
345 
346 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
void fillObject(Queue &queue, portabletest::TestDeviceObject &object, double x, double y, double z, int32_t id) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::View view, double xvalue) const
Definition: TestAlgo.dev.cc:22
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
Eigen::Matrix< double, 3, 6 > Matrix
Definition: TestSoA.h:19
portabletest::TestDeviceCollection update(Queue &queue, portabletest::TestDeviceCollection const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection< TestSoA > TestDeviceCollection
#define input2
Definition: AMPTWrapper.h:159
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceObject::Product *data, double x, double y, double z, int32_t id) const
static std::string const input
Definition: EdmProvDump.cc:50
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection2::View< 1 > view, double xvalue) const
Definition: TestAlgo.dev.cc:42
PortableObject< TestStruct > TestDeviceObject
void fillMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:98
portabletest::TestDeviceMultiCollection2 updateMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection3< TestSoA, TestSoA2, TestSoA3 > TestDeviceMultiCollection3
void fill(Queue &queue, portabletest::TestDeviceCollection &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:83
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2) const
float x
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::ConstView input, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestDeviceCollection::View output) const
Definition: output.py:1
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, portabletest::TestSoA3::ConstView input3, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2, portabletest::TestSoA3::View output3) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection3::View< 2 > view, double xvalue) const
Definition: TestAlgo.dev.cc:64
portabletest::TestDeviceMultiCollection3 updateMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection2< TestSoA, TestSoA2 > TestDeviceMultiCollection2
void fillMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 &collection, double xvalue=0.) const