CMS 3D CMS Logo

clusterTracksByDensity.h
Go to the documentation of this file.
1 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
2 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19 
20  // This algo does not really scale as it works in a single block...
21  // It should be good enough for <10K tracks we have.
22  //
23  // Based on Rodrighez&Laio algo.
24  ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const& acc,
27  WsSoAView& ws,
28  int minT, // min number of neighbours to be "seed"
29  float eps, // max absolute distance to cluster
30  float errmax, // max error to be "seed"
31  float chi2max // max normalized distance to cluster
32  ) {
33  constexpr bool verbose = false;
34 
35  if constexpr (verbose) {
37  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
38  }
39 
40  auto nt = ws.ntrks();
41  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
42  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
43 
44  float const* __restrict__ zt = ws.zt();
45  float const* __restrict__ ezt2 = ws.ezt2();
46  uint8_t* __restrict__ izt = ws.izt();
47  int32_t* __restrict__ iv = ws.iv();
48  int32_t* __restrict__ nn = trkdata.ndof();
54 
56  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
57  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
58 
59  for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
60  hist.off[j] = 0;
61  }
62  for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
63  hws[j] = 0; // used by prefix scan in hist.finalize()
64  }
65  alpaka::syncBlockThreads(acc);
66 
67  if constexpr (verbose) {
69  printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
70  }
71 
73  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
74  ALPAKA_ASSERT_ACC(eps <= 0.1f); // see below
75 
76  // fill hist (bin shall be wider than "eps")
77  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
78  int iz = static_cast<int>(zt[i] * 10.f); // valid if eps <= 0.1
79  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
80  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
81  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
82  izt[i] = iz - INT8_MIN;
83  hist.count(acc, izt[i]);
84  iv[i] = i;
85  nn[i] = 0;
86  }
87  alpaka::syncBlockThreads(acc);
88 
89  hist.finalize(acc, hws);
90  alpaka::syncBlockThreads(acc);
91 
92  ALPAKA_ASSERT_ACC(hist.size() == nt);
93  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
94  hist.fill(acc, izt[i], uint16_t(i));
95  }
96  alpaka::syncBlockThreads(acc);
97 
98  // count neighbours
99  const auto errmax2 = errmax * errmax;
100  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
101  if (ezt2[i] > errmax2)
102  continue;
103  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
104  if (i == j)
105  return;
106  auto dist = std::abs(zt[i] - zt[j]);
107  if (dist > eps)
108  return;
109  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
110  return;
111  nn[i]++;
112  });
113  }
114  alpaka::syncBlockThreads(acc);
115 
116  // find closest above me .... (we ignore the possibility of two j at same distance from i)
117  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
118  float mdist = eps;
119  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
120  if (nn[j] < nn[i])
121  return;
122  if (nn[j] == nn[i] && zt[j] >= zt[i])
123  return; // if equal use natural order...
124  auto dist = std::abs(zt[i] - zt[j]);
125  if (dist > mdist)
126  return;
127  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
128  return; // (break natural order???)
129  mdist = dist;
130  iv[i] = j; // assign to cluster (better be unique??)
131  });
132  }
133  alpaka::syncBlockThreads(acc);
134 
135 #ifdef GPU_DEBUG
136  // mini verification
137  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
138  if (iv[i] != int(i))
139  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
140  }
141  alpaka::syncBlockThreads(acc);
142 #endif
143 
144  // consolidate graph (percolate index of seed)
145  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
146  auto m = iv[i];
147  while (m != iv[m])
148  m = iv[m];
149  iv[i] = m;
150  }
151 
152 #ifdef GPU_DEBUG
153  alpaka::syncBlockThreads(acc);
154  // mini verification
155  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
156  if (iv[i] != int(i))
157  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
158  }
159 #endif
160 
161 #ifdef GPU_DEBUG
162  // and verify that we did not split any cluster...
163  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
164  auto minJ = i;
165  auto mdist = eps;
166  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
167  if (nn[j] < nn[i])
168  return;
169  if (nn[j] == nn[i] && zt[j] >= zt[i])
170  return; // if equal use natural order...
171  auto dist = std::abs(zt[i] - zt[j]);
172  if (dist > mdist)
173  return;
174  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
175  return;
176  mdist = dist;
177  minJ = j;
178  });
179 
180  // should belong to the same cluster...
181  ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
182  ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
183  }
184  alpaka::syncBlockThreads(acc);
185 #endif
186 
187  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
188  foundClusters = 0;
189  alpaka::syncBlockThreads(acc);
190 
191  // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
192  // mark these tracks with a negative id.
193  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
194  if (iv[i] == int(i)) {
195  if (nn[i] >= minT) {
196  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
197  iv[i] = -(old + 1);
198  } else { // noise
199  iv[i] = -9998;
200  }
201  }
202  }
203  alpaka::syncBlockThreads(acc);
204 
205  ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
206 
207  // propagate the negative id to all the tracks in the cluster.
208  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
209  if (iv[i] >= 0) {
210  // mark each track in a cluster with the same id as the first one
211  iv[i] = iv[iv[i]];
212  }
213  }
214  alpaka::syncBlockThreads(acc);
215 
216  // adjust the cluster id to be a positive value starting from 0
217  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
218  iv[i] = -iv[i] - 1;
219  }
220 
221  ws.nvIntermediate() = foundClusters;
222  data.nvFinal() = foundClusters;
223 
224  if constexpr (verbose) {
226  printf("found %d proto vertices\n", foundClusters);
227  }
228  }
229 
231  public:
232  ALPAKA_FN_ACC void operator()(Acc1D const& acc,
235  WsSoAView ws,
236  int minT, // min number of neighbours to be "seed"
237  float eps, // max absolute distance to cluster
238  float errmax, // max error to be "seed"
239  float chi2max // max normalized distance to cluster
240  ) const {
242  }
243  };
244 
245 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
246 
247 #endif // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
ALPAKA_ASSERT_ACC(nvFinal<=nvIntermediate)
bool verbose
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
double f[11][100]
__shared__ Hist::Counter hws[32]
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
Definition: sortByPt2.h:24
ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const &acc, VtxSoAView &data, TrkSoAView &trkdata, WsSoAView &ws, int minT, float eps, float errmax, float chi2max)
ALPAKA_FN_ACC void operator()(Acc1D const &acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, int minT, float eps, float errmax, float chi2max) const