CMS 3D CMS Logo

TestAlgo.dev.cc
Go to the documentation of this file.
1 // Check that ALPAKA_HOST_ONLY is not defined during device compilation:
2 #ifdef ALPAKA_HOST_ONLY
3 #error ALPAKA_HOST_ONLY defined in device compilation
4 #endif
5 
6 #include <alpaka/alpaka.hpp>
7 
12 
13 #include "TestAlgo.h"
14 
16 
17  using namespace cms::alpakatools;
18 
20  public:
21  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
22  ALPAKA_FN_ACC void operator()(TAcc const& acc,
24  int32_t size,
25  double xvalue) const {
26  const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
27  const portabletest::Array flags = {{6, 4, 2, 0}};
28 
29  // set this only once in the whole kernel grid
30  if (once_per_grid(acc)) {
31  view.r() = 1.;
32  }
33 
34  // make a strided loop over the kernel grid, covering up to "size" elements
35  for (int32_t i : elements_with_stride(acc, size)) {
36  view[i] = {xvalue, 0., 0., i, flags, matrix * i};
37  }
38  }
39  };
40 
41  void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const {
42  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
43  uint32_t items = 64;
44 
45  // use as many groups as needed to cover the whole problem
46  uint32_t groups = divide_up_by(collection->metadata().size(), items);
47 
48  // map items to
49  // - threads with a single element per thread on a GPU backend
50  // - elements within a single thread on a CPU backend
51  auto workDiv = make_workdiv<Acc1D>(groups, items);
52 
53  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), collection->metadata().size(), xvalue);
54  }
55 
57  public:
58  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
59  ALPAKA_FN_ACC void operator()(TAcc const& acc,
60  portabletest::TestDeviceObject::Product* data,
61  double x,
62  double y,
63  double z,
64  int32_t id) const {
65  // run on a single thread
66  if (once_per_grid(acc)) {
67  data->x = x;
68  data->y = y;
69  data->z = z;
70  data->id = id;
71  }
72  }
73  };
74 
76  Queue& queue, portabletest::TestDeviceObject& object, double x, double y, double z, int32_t id) const {
77  // run on a single thread
78  auto workDiv = make_workdiv<Acc1D>(1, 1);
79 
80  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
81  }
82 
84  public:
85  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
86  ALPAKA_FN_ACC void operator()(TAcc const& acc,
87  portabletest::TestDeviceCollection::ConstView input,
90  // set this only once in the whole kernel grid
91  if (once_per_grid(acc)) {
92  output.r() = input.r();
93  }
94 
95  // make a strided loop over the kernel grid, covering up to "size" elements
96  for (int32_t i : elements_with_stride(acc, output.metadata().size())) {
97  double x = input[i].x();
98  if (i < esData.size()) {
99  x += esData.val(i) + esData.val2(i);
100  }
101  output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
102  }
103  }
104  };
105 
108  AlpakaESTestDataEDevice const& esData) const {
110 
111  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
112  uint32_t items = 64;
113 
114  // use as many groups as needed to cover the whole problem
115  uint32_t groups = divide_up_by(collection->metadata().size(), items);
116 
117  // map items to
118  // - threads with a single element per thread on a GPU backend
119  // - elements within a single thread on a CPU backend
120  auto workDiv = make_workdiv<Acc1D>(groups, items);
121 
122  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
123 
124  return collection;
125  }
126 
127 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
void fillObject(Queue &queue, portabletest::TestDeviceObject &object, double x, double y, double z, int32_t id) const
Definition: TestAlgo.dev.cc:75
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:19
Eigen::Matrix< double, 3, 6 > Matrix
Definition: TestSoA.h:19
portabletest::TestDeviceCollection update(Queue &queue, portabletest::TestDeviceCollection const &input, AlpakaESTestDataEDevice const &esData) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::View view, int32_t size, double xvalue) const
Definition: TestAlgo.dev.cc:22
PortableCollection< TestSoA > TestDeviceCollection
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceObject::Product *data, double x, double y, double z, int32_t id) const
Definition: TestAlgo.dev.cc:59
static std::string const input
Definition: EdmProvDump.cc:50
PortableObject< TestStruct > TestDeviceObject
void fill(Queue &queue, portabletest::TestDeviceCollection &collection, double xvalue=0.) const
Definition: TestAlgo.dev.cc:41
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
Definition: workdivision.h:793
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
float x
ALPAKA_FN_ACC void operator()(TAcc const &acc, portabletest::TestDeviceCollection::ConstView input, AlpakaESTestDataEDevice::ConstView esData, portabletest::TestDeviceCollection::View output) const
Definition: TestAlgo.dev.cc:86
Definition: output.py:1