CMS 3D CMS Logo

TestAlgo.dev.cc
Go to the documentation of this file.
1 // Check that ALPAKA_HOST_ONLY is not defined during device compilation:
2 #ifdef ALPAKA_HOST_ONLY
3 #error ALPAKA_HOST_ONLY defined in device compilation
4 #endif
5 
6 #include <alpaka/alpaka.hpp>
7 
11 
12 #include "TestAlgo.h"
13 
15 
16  using namespace cms::alpakatools;
17 
19  public:
20  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
21  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
22  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
23  const portabletest::Array flags = {{6, 4, 2, 0}};
24 
25  // set this only once in the whole kernel grid
26  if (once_per_grid(acc)) {
27  view.r() = 1.;
28  }
29 
30  // make a strided loop over the kernel grid, covering up to "size" elements
31  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
32  view[i] = {xvalue, 0., 0., i, flags, matrix * i};
33  }
34  }
35  };
36 
38  public:
39  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
40  ALPAKA_FN_ACC void operator()(TAcc const& acc,
41  portabletest::TestDeviceMultiCollection2::View<1> view,
42  double xvalue) const {
43  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
44 
45  // set this only once in the whole kernel grid
46  if (once_per_grid(acc)) {
47  view.r2() = 2.;
48  }
49 
50  // make a strided loop over the kernel grid, covering up to "size" elements
51  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
52  view[i] = {xvalue, 0., 0., i, matrix * i};
53  }
54  }
55  };
56 
58  public:
59  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
60  ALPAKA_FN_ACC void operator()(TAcc const& acc,
61  portabletest::TestDeviceMultiCollection3::View<2> view,
62  double xvalue) const {
63  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
64 
65  // set this only once in the whole kernel grid
66  if (once_per_grid(acc)) {
67  view.r3() = 3.;
68  }
69 
70  // make a strided loop over the kernel grid, covering up to "size" elements
71  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
72  view[i] = {xvalue, 0., 0., i, matrix * i};
73  }
74  }
75  };
76 
77  void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const {
78  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
79  uint32_t items = 64;
80 
81  // use as many groups as needed to cover the whole problem
82  uint32_t groups = divide_up_by(collection->metadata().size(), items);
83 
84  // map items to
85  // - threads with a single element per thread on a GPU backend
86  // - elements within a single thread on a CPU backend
87  auto workDiv = make_workdiv<Acc1D>(groups, items);
88 
89  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue);
90  }
91 
93  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
94  uint32_t items = 64;
95 
96  // use as many groups as needed to cover the whole problem
97  uint32_t groups = divide_up_by(collection->metadata().size(), items);
98  uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items);
99 
100  // map items to
101  // - threads with a single element per thread on a GPU backend
102  // - elements within a single thread on a CPU backend
103  auto workDiv = make_workdiv<Acc1D>(groups, items);
104  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
105 
106  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
107  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
108  }
109 
111  public:
112  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
113  ALPAKA_FN_ACC void operator()(TAcc const& acc,
114  portabletest::TestDeviceObject::Product* data,
115  double x,
116  double y,
117  double z,
118  int32_t id) const {
119  // run on a single thread
120  if (once_per_grid(acc)) {
121  data->x = x;
122  data->y = y;
123  data->z = z;
124  data->id = id;
125  }
126  }
127  };
128 
130  Queue& queue, portabletest::TestDeviceObject& object, double x, double y, double z, int32_t id) const {
131  // run on a single thread
132  auto workDiv = make_workdiv<Acc1D>(1, 1);
133 
134  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
135  }
136 
138  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
139  uint32_t items = 64;
140 
141  // use as many groups as needed to cover the whole problem
142  uint32_t groups = divide_up_by(collection.view<portabletest::TestSoA>().metadata().size(), items);
143  uint32_t groups2 = divide_up_by(collection.view<portabletest::TestSoA2>().metadata().size(), items);
144  uint32_t groups3 = divide_up_by(collection.view<portabletest::TestSoA3>().metadata().size(), items);
145 
146  // map items to
147  // - threads with a single element per thread on a GPU backend
148  // - elements within a single thread on a CPU backend
149  auto workDiv = make_workdiv<Acc1D>(groups, items);
150  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
151  auto workDiv3 = make_workdiv<Acc1D>(groups3, items);
152 
153  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
154  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
155  alpaka::exec<Acc1D>(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view<portabletest::TestSoA3>(), xvalue);
156  }
157 
159  public:
160  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
161  ALPAKA_FN_ACC void operator()(TAcc const& acc,
162  portabletest::TestDeviceCollection::ConstView input,
165  // set this only once in the whole kernel grid
166  if (once_per_grid(acc)) {
167  output.r() = input.r();
168  }
169 
170  // make a strided loop over the kernel grid, covering up to "size" elements
171  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
172  double x = input[i].x();
173  if (i < esData.size()) {
174  x += esData.val(i) + esData.val2(i);
175  }
176  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
177  }
178  }
179  };
180 
182  public:
183  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
184  ALPAKA_FN_ACC void operator()(TAcc const& acc,
190  // set this only once in the whole kernel grid
191  if (once_per_grid(acc)) {
192  output.r() = input.r();
193  output2.r2() = input2.r2();
194  }
195 
196  // make a strided loop over the kernel grid, covering up to "size" elements
197  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
198  double x = input[i].x();
199  if (i < esData.size()) {
200  x += esData.val(i) + esData.val2(i);
201  }
202  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
203  }
204  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
205  double x2 = input2[i].x2();
206  if (i < esData.size()) {
207  x2 += esData.val(i) + esData.val2(i);
208  }
209  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
210  }
211  }
212  };
213 
215  public:
216  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
217  ALPAKA_FN_ACC void operator()(TAcc const& acc,
225  // set this only once in the whole kernel grid
226  if (once_per_grid(acc)) {
227  output.r() = input.r();
228  output2.r2() = input2.r2();
229  output3.r3() = input3.r3();
230  }
231 
232  // make a strided loop over the kernel grid, covering up to "size" elements
233  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
234  double x = input[i].x();
235  if (i < esData.size()) {
236  x += esData.val(i) + esData.val2(i);
237  if (0 == i)
238  printf("Setting x[0] to %f\n", x);
239  }
240  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
241  }
242  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
243  double x2 = input2[i].x2();
244  if (i < esData.size()) {
245  x2 += esData.val(i) + esData.val2(i);
246  }
247  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
248  }
249  for (int32_t i : uniform_elements(acc, output3.metadata().size())) {
250  double x3 = input3[i].x3();
251  if (i < esData.size()) {
252  x3 += esData.val(i) + esData.val2(i);
253  }
254  output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()};
255  }
256  }
257  };
258 
261  AlpakaESTestDataEDevice const& esData) const {
263 
264  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
265  uint32_t items = 64;
266 
267  // use as many groups as needed to cover the whole problem
268  uint32_t groups = divide_up_by(collection->metadata().size(), items);
269 
270  // map items to
271  // - threads with a single element per thread on a GPU backend
272  // - elements within a single thread on a CPU backend
273  auto workDiv = make_workdiv<Acc1D>(groups, items);
274 
275  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
276 
277  return collection;
278  }
279 
282  AlpakaESTestDataEDevice const& esData) const {
284 
285  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
286  uint32_t items = 64;
287 
288  // use as many groups as needed to cover the whole problem
289  auto sizes = collection.sizes();
290  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
291 
292  // map items to
293  // - threads with a single element per thread on a GPU backend
294  // - elements within a single thread on a CPU backend
295  auto workDiv = make_workdiv<Acc1D>(groups, items);
296 
297  alpaka::exec<Acc1D>(queue,
298  workDiv,
302  esData.view(),
305 
306  return collection;
307  }
308 
311  AlpakaESTestDataEDevice const& esData) const {
313 
314  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
315  uint32_t items = 64;
316 
317  // use as many groups as needed to cover the whole problem
318  auto sizes = collection.sizes();
319  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
320 
321  // map items to
322  // - threads with a single element per thread on a GPU backend
323  // - elements within a single thread on a CPU backend
324  auto workDiv = make_workdiv<Acc1D>(groups, items);
325 
326  alpaka::exec<Acc1D>(queue,
327  workDiv,
332  esData.view(),
336 
337  return collection;
338  }
339 
341  public:
342  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
343  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::ConstView view) const {
344  const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
345  const portabletest::Array flags = {{0, 0, 0, 0}};
346 
347  // check this only once in the whole kernel grid
348  if (once_per_grid(acc)) {
349  ALPAKA_ASSERT(view.r() == 0.);
350  }
351 
352  // make a strided loop over the kernel grid, covering up to "size" elements
353  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
354  auto element = view[i];
355  ALPAKA_ASSERT(element.x() == 0.);
356  ALPAKA_ASSERT(element.y() == 0.);
357  ALPAKA_ASSERT(element.z() == 0.);
358  ALPAKA_ASSERT(element.id() == 0.);
359  ALPAKA_ASSERT(element.flags() == flags);
360  ALPAKA_ASSERT(element.m() == matrix);
361  }
362  }
363  };
364 
366  public:
367  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
368  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection2::ConstView<1> view) const {
369  const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
370 
371  // check this only once in the whole kernel grid
372  if (once_per_grid(acc)) {
373  ALPAKA_ASSERT(view.r2() == 0.);
374  }
375 
376  // make a strided loop over the kernel grid, covering up to "size" elements
377  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
378  auto element = view[i];
379  ALPAKA_ASSERT(element.x2() == 0.);
380  ALPAKA_ASSERT(element.y2() == 0.);
381  ALPAKA_ASSERT(element.z2() == 0.);
382  ALPAKA_ASSERT(element.id2() == 0.);
383  ALPAKA_ASSERT(element.m2() == matrix);
384  }
385  }
386  };
387 
389  public:
390  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
391  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection3::ConstView<2> view) const {
392  const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
393 
394  // check this only once in the whole kernel grid
395  if (once_per_grid(acc)) {
396  ALPAKA_ASSERT(view.r3() == 0.);
397  }
398 
399  // make a strided loop over the kernel grid, covering up to "size" elements
400  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
401  auto element = view[i];
402  ALPAKA_ASSERT(element.x3() == 0.);
403  ALPAKA_ASSERT(element.y3() == 0.);
404  ALPAKA_ASSERT(element.z3() == 0.);
405  ALPAKA_ASSERT(element.id3() == 0.);
406  ALPAKA_ASSERT(element.m3() == matrix);
407  }
408  }
409  };
410 
412  public:
413  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
414  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceObject::Product const* data) const {
415  // check this only once in the whole kernel grid
416  if (once_per_grid(acc)) {
417  ALPAKA_ASSERT(data->x == 0.);
418  ALPAKA_ASSERT(data->y == 0.);
419  ALPAKA_ASSERT(data->z == 0.);
420  ALPAKA_ASSERT(data->id == 0);
421  }
422  }
423  };
424 
425  // Check that the collection has been filled with zeroes.
427  // create a work division with a single block and
428  // - 32 threads with a single element per thread on a GPU backend
429  // - 32 elements within a single thread on a CPU backend
430  auto workDiv = make_workdiv<Acc1D>(1, 32);
431 
432  // the kernel will make a strided loop over the launch grid to cover all elements in the collection
433  alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view());
434  }
435 
436  // Check that the collection has been filled with zeroes.
438  // create a work division with a single block and
439  // - 32 threads with a single element per thread on a GPU backend
440  // - 32 elements within a single thread on a CPU backend
441  auto workDiv = make_workdiv<Acc1D>(1, 32);
442 
443  // the kernels will make a strided loop over the launch grid to cover all elements in the collection
444  alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
445  alpaka::exec<Acc1D>(
447  }
448 
449  // Check that the collection has been filled with zeroes.
451  // create a work division with a single block and
452  // - 32 threads with a single element per thread on a GPU backend
453  // - 32 elements within a single thread on a CPU backend
454  auto workDiv = make_workdiv<Acc1D>(1, 32);
455 
456  // the kernels will make a strided loop over the launch grid to cover all elements in the collection
457  alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
458  alpaka::exec<Acc1D>(
460  alpaka::exec<Acc1D>(
462  }
463 
464  // Check that the object has been filled with zeroes.
465  void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const {
466  // create a work division with a single block and
467  // - 32 threads with a single element per thread on a GPU backend
468  // - 32 elements within a single thread on a CPU backend
469  auto workDiv = make_workdiv<Acc1D>(1, 32);
470 
471  // the kernel will actually use a single thread
472  alpaka::exec<Acc1D>(queue, workDiv, TestZeroStructKernel{}, object.data());
473  }
474 
475 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
void fillObject(Queue &queue, portabletest::TestDeviceObject &object, double x, double y, double z, int32_t id) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::View view, double xvalue) const
Definition: TestAlgo.dev.cc:21
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
Eigen::Matrix< double, 3, 6 > Matrix
Definition: TestSoA.h:19
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection3::ConstView< 2 > view) const
portabletest::TestDeviceCollection update(Queue &queue, portabletest::TestDeviceCollection const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection< TestSoA > TestDeviceCollection
#define input2
Definition: AMPTWrapper.h:159
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::ConstView view) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceObject::Product *data, double x, double y, double z, int32_t id) const
static std::string const input
Definition: EdmProvDump.cc:50
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection2::View< 1 > view, double xvalue) const
Definition: TestAlgo.dev.cc:40
PortableObject< TestStruct > TestDeviceObject
void fillMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:92
portabletest::TestDeviceMultiCollection2 updateMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 const &input, AlpakaESTestDataEDevice const &esData) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection2::ConstView< 1 > view) const
PortableCollection3< TestSoA, TestSoA2, TestSoA3 > TestDeviceMultiCollection3
void fill(Queue &queue, portabletest::TestDeviceCollection &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:77
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceObject::Product const *data) const
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
void checkZero(Queue &queue, portabletest::TestDeviceCollection const &collection) const
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2) const
float x
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::ConstView input, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestDeviceCollection::View output) const
Definition: output.py:1
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, portabletest::TestSoA3::ConstView input3, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2, portabletest::TestSoA3::View output3) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection3::View< 2 > view, double xvalue) const
Definition: TestAlgo.dev.cc:60
portabletest::TestDeviceMultiCollection3 updateMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection2< TestSoA, TestSoA2 > TestDeviceMultiCollection2
void fillMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 &collection, double xvalue=0.) const