CMS 3D CMS Logo

List of all members | Public Member Functions
ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo Class Reference

#include <TestAlgo.h>

Public Member Functions

void fill (Queue &queue, portabletest::TestDeviceCollection &collection, double xvalue=0.) const
 
void fillMulti2 (Queue &queue, portabletest::TestDeviceMultiCollection2 &collection, double xvalue=0.) const
 
void fillMulti3 (Queue &queue, portabletest::TestDeviceMultiCollection3 &collection, double xvalue=0.) const
 
void fillObject (Queue &queue, portabletest::TestDeviceObject &object, double x, double y, double z, int32_t id) const
 
portabletest::TestDeviceCollection update (Queue &queue, portabletest::TestDeviceCollection const &input, AlpakaESTestDataEDevice const &esData) const
 
portabletest::TestDeviceMultiCollection2 updateMulti2 (Queue &queue, portabletest::TestDeviceMultiCollection2 const &input, AlpakaESTestDataEDevice const &esData) const
 
portabletest::TestDeviceMultiCollection3 updateMulti3 (Queue &queue, portabletest::TestDeviceMultiCollection3 const &input, AlpakaESTestDataEDevice const &esData) const
 

Detailed Description

Definition at line 11 of file TestAlgo.h.

Member Function Documentation

◆ fill()

void ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::fill ( Queue &  queue,
portabletest::TestDeviceCollection collection,
double  xvalue = 0. 
) const

Definition at line 82 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), mps_monitormerge::items, and createBeamHaloJobs::queue.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaStreamSynchronizingProducerToDevice::acquire(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducerOffset::produce(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaProducer::produce(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducer::produce(), and ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaStreamProducer::produce().

82  {
83  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
84  uint32_t items = 64;
85 
86  // use as many groups as needed to cover the whole problem
87  uint32_t groups = divide_up_by(collection->metadata().size(), items);
88 
89  // map items to
90  // - threads with a single element per thread on a GPU backend
91  // - elements within a single thread on a CPU backend
92  auto workDiv = make_workdiv<Acc1D>(groups, items);
93 
94  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue);
95  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20

◆ fillMulti2()

void ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::fillMulti2 ( Queue &  queue,
portabletest::TestDeviceMultiCollection2 collection,
double  xvalue = 0. 
) const

Definition at line 97 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), mps_monitormerge::items, and createBeamHaloJobs::queue.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaProducer::produce(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducer::produce(), and ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaStreamProducer::produce().

97  {
98  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
99  uint32_t items = 64;
100 
101  // use as many groups as needed to cover the whole problem
102  uint32_t groups = divide_up_by(collection->metadata().size(), items);
103  uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items);
104 
105  // map items to
106  // - threads with a single element per thread on a GPU backend
107  // - elements within a single thread on a CPU backend
108  auto workDiv = make_workdiv<Acc1D>(groups, items);
109  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
110 
111  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
112  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
113  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20

◆ fillMulti3()

void ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::fillMulti3 ( Queue &  queue,
portabletest::TestDeviceMultiCollection3 collection,
double  xvalue = 0. 
) const

Definition at line 142 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), mps_monitormerge::items, and createBeamHaloJobs::queue.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaProducer::produce(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducer::produce(), and ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaStreamProducer::produce().

142  {
143  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
144  uint32_t items = 64;
145 
146  // use as many groups as needed to cover the whole problem
147  uint32_t groups = divide_up_by(collection.view<portabletest::TestSoA>().metadata().size(), items);
148  uint32_t groups2 = divide_up_by(collection.view<portabletest::TestSoA2>().metadata().size(), items);
149  uint32_t groups3 = divide_up_by(collection.view<portabletest::TestSoA3>().metadata().size(), items);
150 
151  // map items to
152  // - threads with a single element per thread on a GPU backend
153  // - elements within a single thread on a CPU backend
154  auto workDiv = make_workdiv<Acc1D>(groups, items);
155  auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
156  auto workDiv3 = make_workdiv<Acc1D>(groups3, items);
157 
158  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
159  alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
160  alpaka::exec<Acc1D>(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view<portabletest::TestSoA3>(), xvalue);
161  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20

◆ fillObject()

void ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::fillObject ( Queue &  queue,
portabletest::TestDeviceObject object,
double  x,
double  y,
double  z,
int32_t  id 
) const

Definition at line 134 of file TestAlgo.dev.cc.

References l1ctLayer2EG_cff::id, createBeamHaloJobs::queue, and x.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaProducer::produce().

135  {
136  // run on a single thread
137  auto workDiv = make_workdiv<Acc1D>(1, 1);
138 
139  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
140  }

◆ update()

portabletest::TestDeviceCollection ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::update ( Queue &  queue,
portabletest::TestDeviceCollection const &  input,
AlpakaESTestDataEDevice const &  esData 
) const

Definition at line 264 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), input, mps_monitormerge::items, createBeamHaloJobs::queue, and cms::alpakatest::AlpakaESTestDataE< TDev >::view().

Referenced by progressbar.ProgressBar::__next__(), MatrixUtil.Matrix::__setitem__(), MatrixUtil.Steps::__setitem__(), progressbar.ProgressBar::finish(), MatrixUtil.Steps::overwrite(), and ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducerE::produce().

266  {
268 
269  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
270  uint32_t items = 64;
271 
272  // use as many groups as needed to cover the whole problem
273  uint32_t groups = divide_up_by(collection->metadata().size(), items);
274 
275  // map items to
276  // - threads with a single element per thread on a GPU backend
277  // - elements within a single thread on a CPU backend
278  auto workDiv = make_workdiv<Acc1D>(groups, items);
279 
280  alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
281 
282  return collection;
283  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
PortableCollection< TestSoA > TestDeviceCollection
static std::string const input
Definition: EdmProvDump.cc:50

◆ updateMulti2()

portabletest::TestDeviceMultiCollection2 ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::updateMulti2 ( Queue &  queue,
portabletest::TestDeviceMultiCollection2 const &  input,
AlpakaESTestDataEDevice const &  esData 
) const

Definition at line 285 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), input, mps_monitormerge::items, createBeamHaloJobs::queue, and cms::alpakatest::AlpakaESTestDataE< TDev >::view().

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducerE::produce().

287  {
289 
290  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
291  uint32_t items = 64;
292 
293  // use as many groups as needed to cover the whole problem
294  auto sizes = collection.sizes();
295  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
296 
297  // map items to
298  // - threads with a single element per thread on a GPU backend
299  // - elements within a single thread on a CPU backend
300  auto workDiv = make_workdiv<Acc1D>(groups, items);
301 
302  alpaka::exec<Acc1D>(queue,
303  workDiv,
304  TestAlgoKernelUpdateMulti2{},
307  esData.view(),
310 
311  return collection;
312  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
static std::string const input
Definition: EdmProvDump.cc:50
PortableCollection2< TestSoA, TestSoA2 > TestDeviceMultiCollection2

◆ updateMulti3()

portabletest::TestDeviceMultiCollection3 ALPAKA_ACCELERATOR_NAMESPACE::TestAlgo::updateMulti3 ( Queue &  queue,
portabletest::TestDeviceMultiCollection3 const &  input,
AlpakaESTestDataEDevice const &  esData 
) const

Definition at line 314 of file TestAlgo.dev.cc.

References universalConfigTemplate::collection, cms::alpakatools::divide_up_by(), input, mps_monitormerge::items, createBeamHaloJobs::queue, and cms::alpakatest::AlpakaESTestDataE< TDev >::view().

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlpakaGlobalProducerE::produce().

316  {
318 
319  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
320  uint32_t items = 64;
321 
322  // use as many groups as needed to cover the whole problem
323  auto sizes = collection.sizes();
324  uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
325 
326  // map items to
327  // - threads with a single element per thread on a GPU backend
328  // - elements within a single thread on a CPU backend
329  auto workDiv = make_workdiv<Acc1D>(groups, items);
330 
331  alpaka::exec<Acc1D>(queue,
332  workDiv,
333  TestAlgoKernelUpdateMulti3{},
337  esData.view(),
341 
342  return collection;
343  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
static std::string const input
Definition: EdmProvDump.cc:50
PortableCollection3< TestSoA, TestSoA2, TestSoA3 > TestDeviceMultiCollection3