CMS 3D CMS Logo

TestAlgo.dev.cc
Go to the documentation of this file.
1 // Check that ALPAKA_HOST_ONLY is not defined during device compilation:
2 #ifdef ALPAKA_HOST_ONLY
3 #error ALPAKA_HOST_ONLY defined in device compilation
4 #endif
5 
6 #include <alpaka/alpaka.hpp>
7 
11 
12 #include "TestAlgo.h"
13 
15 
16  using namespace cms::alpakatools;
17 
19  public:
20  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
21  ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
22  // global index of the thread within the grid
23  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
24  const portabletest::Array flags = {{6, 4, 2, 0}};
25 
26  // set this only once in the whole kernel grid
27  if (once_per_grid(acc)) {
28  view.r() = 1.;
29  }
30 
31  // make a strided loop over the kernel grid, covering up to "size" elements
32  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
33  view[i] = {xvalue, 0., 0., i, flags, matrix * i};
34  }
35  }
36  };
37 
39  public:
40  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
41  ALPAKA_FN_ACC void operator()(TAcc const& acc,
42  portabletest::TestDeviceMultiCollection2::View<1> view,
43  double xvalue) const {
44  // global index of the thread within the grid
45  const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
46  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
47 
48  // set this only once in the whole kernel grid
49  if (thread == 0) {
50  view.r2() = 2.;
51  }
52 
53  // make a strided loop over the kernel grid, covering up to "size" elements
54  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
55  view[i] = {xvalue, 0., 0., i, matrix * i};
56  }
57  }
58  };
59 
61  public:
62  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
63  ALPAKA_FN_ACC void operator()(TAcc const& acc,
64  portabletest::TestDeviceMultiCollection3::View<2> view,
65  double xvalue) const {
66  // global index of the thread within the grid
67  const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
68  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
69 
70  // set this only once in the whole kernel grid
71  if (thread == 0) {
72  view.r3() = 3.;
73  }
74 
75  // make a strided loop over the kernel grid, covering up to "size" elements
76  for (int32_t i : uniform_elements(acc, view.metadata().size())) {
77  view[i] = {xvalue, 0., 0., i, matrix * i};
78  }
79  }
80  };
81 
82  void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const {
83  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
84  uint32_t items = 64;
85 
86  // use as many groups as needed to cover the whole problem
87  uint32_t groups = divide_up_by(collection->metadata().size(), items);
88 
89  // map items to
90  // - threads with a single element per thread on a GPU backend
91  // - elements within a single thread on a CPU backend
92  auto workDiv = make_workdiv<Acc1D>(groups, items);
93 
94  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue);
95  }
96 
98  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
99  uint32_t items = 64;
100 
101  // use as many groups as needed to cover the whole problem
102  uint32_t groups = divide_up_by(collection->metadata().size(), items);
103  uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items);
104 
105  // map items to
106  // - threads with a single element per thread on a GPU backend
107  // - elements within a single thread on a CPU backend
108  auto workDiv = make_workdiv<Acc1D>(groups, items);
109  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
110 
111  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
112  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
113  }
114 
116  public:
117  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
118  ALPAKA_FN_ACC void operator()(TAcc const& acc,
119  portabletest::TestDeviceObject::Product* data,
120  double x,
121  double y,
122  double z,
123  int32_t id) const {
124  // run on a single thread
125  if (once_per_grid(acc)) {
126  data->x = x;
127  data->y = y;
128  data->z = z;
129  data->id = id;
130  }
131  }
132  };
133 
135  Queue& queue, portabletest::TestDeviceObject& object, double x, double y, double z, int32_t id) const {
136  // run on a single thread
137  auto workDiv = make_workdiv<Acc1D>(1, 1);
138 
139  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
140  }
141 
143  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
144  uint32_t items = 64;
145 
146  // use as many groups as needed to cover the whole problem
147  uint32_t groups = divide_up_by(collection.view<portabletest::TestSoA>().metadata().size(), items);
148  uint32_t groups2 = divide_up_by(collection.view<portabletest::TestSoA2>().metadata().size(), items);
149  uint32_t groups3 = divide_up_by(collection.view<portabletest::TestSoA3>().metadata().size(), items);
150 
151  // map items to
152  // - threads with a single element per thread on a GPU backend
153  // - elements within a single thread on a CPU backend
154  auto workDiv = make_workdiv<Acc1D>(groups, items);
155  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
156  auto workDiv3 = make_workdiv<Acc1D>(groups3, items);
157 
158  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
159  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
160  alpaka::exec<Acc1D>(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view<portabletest::TestSoA3>(), xvalue);
161  }
162 
164  public:
165  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
166  ALPAKA_FN_ACC void operator()(TAcc const& acc,
167  portabletest::TestDeviceCollection::ConstView input,
170  // set this only once in the whole kernel grid
171  if (once_per_grid(acc)) {
172  output.r() = input.r();
173  }
174 
175  // make a strided loop over the kernel grid, covering up to "size" elements
176  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
177  double x = input[i].x();
178  if (i < esData.size()) {
179  x += esData.val(i) + esData.val2(i);
180  }
181  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
182  }
183  }
184  };
185 
187  public:
188  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
189  ALPAKA_FN_ACC void operator()(TAcc const& acc,
195  // set this only once in the whole kernel grid
196  if (once_per_grid(acc)) {
197  output.r() = input.r();
198  output2.r2() = input2.r2();
199  }
200 
201  // make a strided loop over the kernel grid, covering up to "size" elements
202  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
203  double x = input[i].x();
204  if (i < esData.size()) {
205  x += esData.val(i) + esData.val2(i);
206  }
207  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
208  }
209  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
210  double x2 = input2[i].x2();
211  if (i < esData.size()) {
212  x2 += esData.val(i) + esData.val2(i);
213  }
214  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
215  }
216  }
217  };
218 
220  public:
221  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
222  ALPAKA_FN_ACC void operator()(TAcc const& acc,
230  // set this only once in the whole kernel grid
231  if (once_per_grid(acc)) {
232  output.r() = input.r();
233  output2.r2() = input2.r2();
234  output3.r3() = input3.r3();
235  }
236 
237  // make a strided loop over the kernel grid, covering up to "size" elements
238  for (int32_t i : uniform_elements(acc, output.metadata().size())) {
239  double x = input[i].x();
240  if (i < esData.size()) {
241  x += esData.val(i) + esData.val2(i);
242  if (0 == i)
243  printf("Setting x[0] to %f\n", x);
244  }
245  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
246  }
247  for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
248  double x2 = input2[i].x2();
249  if (i < esData.size()) {
250  x2 += esData.val(i) + esData.val2(i);
251  }
252  output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
253  }
254  for (int32_t i : uniform_elements(acc, output3.metadata().size())) {
255  double x3 = input3[i].x3();
256  if (i < esData.size()) {
257  x3 += esData.val(i) + esData.val2(i);
258  }
259  output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()};
260  }
261  }
262  };
263 
266  AlpakaESTestDataEDevice const& esData) const {
268 
269  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
270  uint32_t items = 64;
271 
272  // use as many groups as needed to cover the whole problem
273  uint32_t groups = divide_up_by(collection->metadata().size(), items);
274 
275  // map items to
276  // - threads with a single element per thread on a GPU backend
277  // - elements within a single thread on a CPU backend
278  auto workDiv = make_workdiv<Acc1D>(groups, items);
279 
280  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
281 
282  return collection;
283  }
284 
287  AlpakaESTestDataEDevice const& esData) const {
289 
290  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
291  uint32_t items = 64;
292 
293  // use as many groups as needed to cover the whole problem
294  auto sizes = collection.sizes();
295  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
296 
297  // map items to
298  // - threads with a single element per thread on a GPU backend
299  // - elements within a single thread on a CPU backend
300  auto workDiv = make_workdiv<Acc1D>(groups, items);
301 
302  alpaka::exec<Acc1D>(queue,
303  workDiv,
307  esData.view(),
310 
311  return collection;
312  }
313 
316  AlpakaESTestDataEDevice const& esData) const {
318 
319  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
320  uint32_t items = 64;
321 
322  // use as many groups as needed to cover the whole problem
323  auto sizes = collection.sizes();
324  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
325 
326  // map items to
327  // - threads with a single element per thread on a GPU backend
328  // - elements within a single thread on a CPU backend
329  auto workDiv = make_workdiv<Acc1D>(groups, items);
330 
331  alpaka::exec<Acc1D>(queue,
332  workDiv,
337  esData.view(),
341 
342  return collection;
343  }
344 
345 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
void fillObject(Queue &queue, portabletest::TestDeviceObject &object, double x, double y, double z, int32_t id) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::View view, double xvalue) const
Definition: TestAlgo.dev.cc:21
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
Eigen::Matrix< double, 3, 6 > Matrix
Definition: TestSoA.h:19
portabletest::TestDeviceCollection update(Queue &queue, portabletest::TestDeviceCollection const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection< TestSoA > TestDeviceCollection
#define input2
Definition: AMPTWrapper.h:159
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceObject::Product *data, double x, double y, double z, int32_t id) const
static std::string const input
Definition: EdmProvDump.cc:50
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection2::View< 1 > view, double xvalue) const
Definition: TestAlgo.dev.cc:41
PortableObject< TestStruct > TestDeviceObject
void fillMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:97
portabletest::TestDeviceMultiCollection2 updateMulti2(Queue &queue, portabletest::TestDeviceMultiCollection2 const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection3< TestSoA, TestSoA2, TestSoA3 > TestDeviceMultiCollection3
void fill(Queue &queue, portabletest::TestDeviceCollection &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:82
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2) const
float x
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::ConstView input, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestDeviceCollection::View output) const
Definition: output.py:1
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestSoA::ConstView input, portabletest::TestSoA2::ConstView input2, portabletest::TestSoA3::ConstView input3, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestSoA::View output, portabletest::TestSoA2::View output2, portabletest::TestSoA3::View output3) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceMultiCollection3::View< 2 > view, double xvalue) const
Definition: TestAlgo.dev.cc:63
portabletest::TestDeviceMultiCollection3 updateMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 const &input, AlpakaESTestDataEDevice const &esData) const
PortableCollection2< TestSoA, TestSoA2 > TestDeviceMultiCollection2
void fillMulti3(Queue &queue, portabletest::TestDeviceMultiCollection3 &collection, double xvalue=0.) const