CMS 3D CMS Logo

HGCalLayerClustersSoAAlgoWrapper.dev.cc
Go to the documentation of this file.
2 
4 #include "ConstantsForClusters.h"
5 
6 #include "CLUEAlgoAlpaka.h"
7 
9 
10  using namespace cms::alpakatools;
11  using namespace hgcal::constants;
12 
13  // Set energy and number of hits in each clusters
15  public:
16  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
17  ALPAKA_FN_ACC void operator()(TAcc const& acc,
18  const unsigned int numer_of_clusters,
19  const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
20  const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
22  // make a strided loop over the kernel grid, covering up to "size" elements
23  for (int32_t i : uniform_elements(acc, input_rechits_soa.metadata().size())) {
24  // Skip unassigned rechits
25  if (input_clusters_soa[i].clusterIndex() == kInvalidCluster) {
26  continue;
27  }
28  auto clIdx = input_clusters_soa[i].clusterIndex();
29  alpaka::atomicAdd(acc, &outputs[clIdx].energy(), input_rechits_soa[i].weight());
30  alpaka::atomicAdd(acc, &outputs[clIdx].cells(), 1);
31  if (input_clusters_soa[i].isSeed() == 1) {
32  outputs[clIdx].seed() = input_rechits_soa[i].detid();
33  }
34  }
35  }
36  };
37 
38  // Kernel to find the max for every cluster
40  public:
41  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
42  ALPAKA_FN_ACC void operator()(TAcc const& acc,
43  const unsigned int numer_of_clusters,
44  float thresholdW0,
45  float positionDeltaRho2,
46  const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
47  const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
49  HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
50  // make a strided loop over the kernel grid, covering up to "size" elements
51  for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
52  const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
53 
54  // Bail out if you are not part of any cluster
55  if (cluster_index == kInvalidCluster) {
56  continue;
57  }
58 
59  alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight(), input_rechits_soa[hit_index].weight());
60  // Read the current seed index, and the associated energy.
61  int clusterSeed = outputs_service[cluster_index].maxEnergyIndex();
62  float clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
63 
64  while (input_rechits_soa[hit_index].weight() > clusterEnergy) {
65  // If output_service[cluster_index].maxEnergyIndex() did not change,
66  // store the new value and exit the loop. Otherwise return the value
67  // that has been updated, and decide again if the maximum needs to be
68  // updated.
69  int seed = alpaka::atomicCas(acc, &outputs_service[cluster_index].maxEnergyIndex(), clusterSeed, hit_index);
70  if (seed == hit_index) {
71  // atomicCas has stored the new value.
72  break;
73  } else {
74  // Update the seed index and re-read the associated energy.
75  clusterSeed = seed;
76  clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
77  }
78  } // CAS
79  } // uniform_elements
80  } // operator()
81  };
82 
83  // Real Kernel position
85  public:
86  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
87  ALPAKA_FN_ACC void operator()(TAcc const& acc,
88  const unsigned int numer_of_clusters,
89  float thresholdW0,
90  float positionDeltaRho2,
91  const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
92  const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
94  HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
95  // make a strided loop over the kernel grid, covering up to "size" elements
96  for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
97  const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
98 
99  // Bail out if you are not part of any cluster
100  if (cluster_index == kInvalidCluster) {
101  continue;
102  }
103  const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
104 
105  //for silicon only just use 1+6 cells = 1.3cm for all thicknesses
106  const float d1 = input_rechits_soa[hit_index].dim1() - input_rechits_soa[max_energy_index].dim1();
107  const float d2 = input_rechits_soa[hit_index].dim2() - input_rechits_soa[max_energy_index].dim2();
108  if (std::fmaf(d1, d1, d2 * d2) > positionDeltaRho2) {
109  continue;
110  }
111  float Wi = std::max(thresholdW0 + std::log(input_rechits_soa[hit_index].weight() /
112  outputs_service[cluster_index].total_weight()),
113  0.f);
114  alpaka::atomicAdd(acc, &outputs[cluster_index].x(), input_rechits_soa[hit_index].dim1() * Wi);
115  alpaka::atomicAdd(acc, &outputs[cluster_index].y(), input_rechits_soa[hit_index].dim2() * Wi);
116  alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight_log(), Wi);
117  } // uniform_elements
118  } // operator()
119  };
120 
121  // Besides the final position, add also the DetId of the seed of each cluster
123  public:
124  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
125  ALPAKA_FN_ACC void operator()(TAcc const& acc,
126  const unsigned int numer_of_clusters,
127  float thresholdW0,
128  float positionDeltaRho2,
129  const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
130  const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
132  HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
133  // make a strided loop over the kernel grid, covering up to "size" elements
134  for (int32_t cluster_index : uniform_elements(acc, outputs.metadata().size())) {
135  const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
136 
137  if (outputs_service[cluster_index].total_weight_log() > 0.f) {
138  float inv_tot_weight = 1.f / outputs_service[cluster_index].total_weight_log();
139  outputs[cluster_index].x() *= inv_tot_weight;
140  outputs[cluster_index].y() *= inv_tot_weight;
141  } else {
142  outputs[cluster_index].x() = input_rechits_soa[max_energy_index].dim1();
143  outputs[cluster_index].y() = input_rechits_soa[max_energy_index].dim2();
144  }
145  outputs[cluster_index].z() = input_rechits_soa[max_energy_index].dim3();
146  } // uniform_elements
147  } // operator()
148  };
149 
151  const unsigned int size,
152  float thresholdW0,
153  float positionDeltaRho2,
154  const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
155  const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
157  HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
158  auto x = cms::alpakatools::make_device_view<float>(queue, outputs.x(), size);
159  alpaka::memset(queue, x, 0x0);
160  auto y = cms::alpakatools::make_device_view<float>(queue, outputs.y(), size);
161  alpaka::memset(queue, y, 0x0);
162  auto z = cms::alpakatools::make_device_view<float>(queue, outputs.z(), size);
163  alpaka::memset(queue, z, 0x0);
164  auto seed = cms::alpakatools::make_device_view<int>(queue, outputs.seed(), size);
165  alpaka::memset(queue, seed, 0x0);
166  auto energy = cms::alpakatools::make_device_view<float>(queue, outputs.energy(), size);
167  alpaka::memset(queue, energy, 0x0);
168  auto cells = cms::alpakatools::make_device_view<int>(queue, outputs.cells(), size);
169  alpaka::memset(queue, cells, 0x0);
170  auto total_weight = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight(), size);
171  alpaka::memset(queue, total_weight, 0x0);
172  auto total_weight_log = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight_log(), size);
173  alpaka::memset(queue, total_weight_log, 0x0);
174  auto maxEnergyValue = cms::alpakatools::make_device_view<float>(queue, outputs_service.maxEnergyValue(), size);
175  alpaka::memset(queue, maxEnergyValue, 0x0);
176  auto maxEnergyIndex = cms::alpakatools::make_device_view<int>(queue, outputs_service.maxEnergyIndex(), size);
177  alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
178 
179  // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
180  uint32_t items = 64;
181 
182  // use as many groups as needed to cover the whole problem
183  uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
184 
185  // map items to
186  // - threads with a single element per thread on a GPU backend
187  // - elements within a single thread on a CPU backend
188  auto workDiv = make_workdiv<Acc1D>(groups, items);
189 
190  alpaka::exec<Acc1D>(
191  queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
192  alpaka::exec<Acc1D>(queue,
193  workDiv,
195  size,
196  thresholdW0,
198  input_rechits_soa,
199  input_clusters_soa,
200  outputs,
201  outputs_service);
202  alpaka::exec<Acc1D>(queue,
203  workDiv,
205  size,
206  thresholdW0,
208  input_rechits_soa,
209  input_clusters_soa,
210  outputs,
211  outputs_service);
212  uint32_t group_clusters = divide_up_by(size, items);
213  auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
214  alpaka::exec<Acc1D>(queue,
215  workDivClusters,
217  size,
218  thresholdW0,
220  input_rechits_soa,
221  input_clusters_soa,
222  outputs,
223  outputs_service);
224  }
225 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
constexpr auto kInvalidIndex
ALPAKA_FN_ACC void operator()(TAcc const &acc, const unsigned int numer_of_clusters, float thresholdW0, float positionDeltaRho2, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs, HGCalSoAClustersExtraDeviceCollection::View outputs_service) const
static constexpr uint8_t kInvalidIndexByte
void run(Queue &queue, const unsigned int numer_of_clusters, float thresholdW0, float positionDeltaRho2, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs, HGCalSoAClustersExtraDeviceCollection::View outputs_service) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, const unsigned int numer_of_clusters, float thresholdW0, float positionDeltaRho2, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs, HGCalSoAClustersExtraDeviceCollection::View outputs_service) const
double f[11][100]
ALPAKA_FN_ACC void operator()(TAcc const &acc, const unsigned int numer_of_clusters, float thresholdW0, float positionDeltaRho2, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs, HGCalSoAClustersExtraDeviceCollection::View outputs_service) const
float x
static constexpr int kInvalidCluster
static constexpr float d1
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC void operator()(TAcc const &acc, const unsigned int numer_of_clusters, const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa, const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa, HGCalSoAClustersDeviceCollection::View outputs) const