CMS 3D CMS Logo

List of all members | Public Member Functions
ALPAKA_ACCELERATOR_NAMESPACE::HGCalLayerClustersSoAAlgoWrapper Class Reference

#include <HGCalLayerClustersSoAAlgoWrapper.h>

Public Member Functions

void run (Queue &queue, const unsigned int numer_of_clusters, float thresholdW0, float positionDeltaRho2, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs, HGCalSoAClustersExtraDeviceCollection::View outputs_service) const
 

Detailed Description

Definition at line 17 of file HGCalLayerClustersSoAAlgoWrapper.h.

Member Function Documentation

◆ run()

void ALPAKA_ACCELERATOR_NAMESPACE::HGCalLayerClustersSoAAlgoWrapper::run ( Queue &  queue,
const unsigned int  numer_of_clusters,
float  thresholdW0,
float  positionDeltaRho2,
const HGCalSoARecHitsDeviceCollection::ConstView  input_rechits_soa,
const HGCalSoARecHitsExtraDeviceCollection::ConstView  input_clusters_soa,
HGCalSoAClustersDeviceCollection::View  outputs,
HGCalSoAClustersExtraDeviceCollection::View  outputs_service 
) const

Definition at line 150 of file HGCalLayerClustersSoAAlgoWrapper.dev.cc.

References hgcalTBTopologyTester_cfi::cells, cms::alpakatools::divide_up_by(), hcalRecHitTable_cff::energy, mps_monitormerge::items, hgcal::constants::kInvalidIndexByte, PatBasicFWLiteJetAnalyzer_Selector_cfg::outputs, hltHgcalLayerClustersEE_cfi::positionDeltaRho2, createBeamHaloJobs::queue, fileCollector::seed, hltHgcalLayerClustersEE_cfi::thresholdW0, and x.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::HGCalSoALayerClustersProducer::produce().

157  {
158  auto x = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.x(), size);
159  alpaka::memset(queue, x, 0x0);
160  auto y = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.y(), size);
161  alpaka::memset(queue, y, 0x0);
162  auto z = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.z(), size);
163  alpaka::memset(queue, z, 0x0);
164  auto seed = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.seed(), size);
165  alpaka::memset(queue, seed, 0x0);
166  auto energy = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.energy(), size);
167  alpaka::memset(queue, energy, 0x0);
168  auto cells = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.cells(), size);
169  alpaka::memset(queue, cells, 0x0);
170  auto total_weight =
171  cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight(), size);
172  alpaka::memset(queue, total_weight, 0x0);
173  auto total_weight_log =
174  cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight_log(), size);
175  alpaka::memset(queue, total_weight_log, 0x0);
176  auto maxEnergyValue =
177  cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.maxEnergyValue(), size);
178  alpaka::memset(queue, maxEnergyValue, 0x0);
179  auto maxEnergyIndex =
180  cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs_service.maxEnergyIndex(), size);
181  alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
182 
183  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
184  uint32_t items = 64;
185 
186  // use as many groups as needed to cover the whole problem
187  uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
188 
189  // map items to
190  // - threads with a single element per thread on a GPU backend
191  // - elements within a single thread on a CPU backend
192  auto workDiv = make_workdiv<Acc1D>(groups, items);
193 
194  alpaka::exec<Acc1D>(
195  queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
196  alpaka::exec<Acc1D>(queue,
197  workDiv,
198  HGCalLayerClustersSoAAlgoKernelPositionByHits{},
199  size,
200  thresholdW0,
202  input_rechits_soa,
203  input_clusters_soa,
204  outputs,
205  outputs_service);
206  alpaka::exec<Acc1D>(queue,
207  workDiv,
208  HGCalLayerClustersSoAAlgoKernelPositionByHits2{},
209  size,
210  thresholdW0,
212  input_rechits_soa,
213  input_clusters_soa,
214  outputs,
215  outputs_service);
216  uint32_t group_clusters = divide_up_by(size, items);
217  auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
218  alpaka::exec<Acc1D>(queue,
219  workDivClusters,
220  HGCalLayerClustersSoAAlgoKernelPositionByHits3{},
221  size,
222  thresholdW0,
224  input_rechits_soa,
225  input_clusters_soa,
226  outputs,
227  outputs_service);
228  }
size
Write out results.
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
static constexpr uint8_t kInvalidIndexByte