CMS 3D CMS Logo

clusterTracksByDensity.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
2 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19 
22  // this algo does not really scale as it works in a single block...
23  // enough for <10K tracks we have
24  //
25  // based on Rodrighez&Laio algo
26  //
27  template <typename TAcc>
28  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
29  clusterTracksByDensity(const TAcc& acc,
31  WsSoAView& pws,
32  int minT, // min number of neighbours to be "seed"
33  float eps, // max absolute distance to cluster
34  float errmax, // max error to be "seed"
35  float chi2max // max normalized distance to cluster
36  ) {
37  using namespace vertexFinder;
38  constexpr bool verbose = false; // in principle the compiler should optmize out if false
39  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
40 
43  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
44  }
45  auto er2mx = errmax * errmax;
46 
47  auto& __restrict__ data = pdata;
48  auto& __restrict__ ws = pws;
49  auto nt = ws.ntrks();
50  float const* __restrict__ zt = ws.zt();
51  float const* __restrict__ ezt2 = ws.ezt2();
52 
53  uint32_t& nvFinal = data.nvFinal();
54  uint32_t& nvIntermediate = ws.nvIntermediate();
55 
56  uint8_t* __restrict__ izt = ws.izt();
57  int32_t* __restrict__ nn = data.ndof();
58  int32_t* __restrict__ iv = ws.iv();
59 
65 
67  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
68  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
69 
71  hist.off[j] = 0;
72  }
73  alpaka::syncBlockThreads(acc);
74 
75  if constexpr (verbose) {
77  printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
78  }
79  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
80 
81  // fill hist (bin shall be wider than "eps")
84  int iz = int(zt[i] * 10.); // valid if eps<=0.1
85  // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only
86  iz = std::min(std::max(iz, INT8_MIN), INT8_MAX);
87  izt[i] = iz - INT8_MIN;
88  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
89  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
90  hist.count(acc, izt[i]);
91  iv[i] = i;
92  nn[i] = 0;
93  }
94  alpaka::syncBlockThreads(acc);
95  if (threadIdxLocal < 32)
96  hws[threadIdxLocal] = 0; // used by prefix scan...
97  alpaka::syncBlockThreads(acc);
98  hist.finalize(acc, hws);
99  alpaka::syncBlockThreads(acc);
100  ALPAKA_ASSERT_ACC(hist.size() == nt);
101  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
102  hist.fill(acc, izt[i], uint16_t(i));
103  }
104  alpaka::syncBlockThreads(acc);
105  // count neighbours
106  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
107  if (ezt2[i] > er2mx)
108  continue;
109  auto loop = [&](uint32_t j) {
110  if (i == j)
111  return;
112  auto dist = std::abs(zt[i] - zt[j]);
113  if (dist > eps)
114  return;
115  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
116  return;
117  nn[i]++;
118  };
119 
121  }
122  alpaka::syncBlockThreads(acc);
123 
124  // find closest above me .... (we ignore the possibility of two j at same distance from i)
125  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
126  float mdist = eps;
127  auto loop = [&](uint32_t j) {
128  if (nn[j] < nn[i])
129  return;
130  if (nn[j] == nn[i] && zt[j] >= zt[i])
131  return; // if equal use natural order...
132  auto dist = std::abs(zt[i] - zt[j]);
133  if (dist > mdist)
134  return;
135  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
136  return; // (break natural order???)
137  mdist = dist;
138  iv[i] = j; // assign to cluster (better be unique??)
139  };
141  }
142  alpaka::syncBlockThreads(acc);
143 
144 #ifdef GPU_DEBUG
145  // mini verification
146  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
147  if (iv[i] != int(i))
148  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
149  }
150  alpaka::syncBlockThreads(acc);
151 #endif
152 
153  // consolidate graph (percolate index of seed)
154  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
155  auto m = iv[i];
156  while (m != iv[m])
157  m = iv[m];
158  iv[i] = m;
159  }
160 
161 #ifdef GPU_DEBUG
162  alpaka::syncBlockThreads(acc);
163  // mini verification
164  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
165  if (iv[i] != int(i))
166  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
167  }
168 #endif
169 
170 #ifdef GPU_DEBUG
171  // and verify that we did not spit any cluster...
172  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
173  auto minJ = i;
174  auto mdist = eps;
175  auto loop = [&](uint32_t j) {
176  if (nn[j] < nn[i])
177  return;
178  if (nn[j] == nn[i] && zt[j] >= zt[i])
179  return; // if equal use natural order...
180  auto dist = std::abs(zt[i] - zt[j]);
181  if (dist > mdist)
182  return;
183  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
184  return;
185  mdist = dist;
186  minJ = j;
187  };
189  // should belong to the same cluster...
190  ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
191  ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
192  }
193  alpaka::syncBlockThreads(acc);
194 #endif
195 
196  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
197  foundClusters = 0;
198  alpaka::syncBlockThreads(acc);
199 
200  // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
201  // mark these tracks with a negative id.
202  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
203  if (iv[i] == int(i)) {
204  if (nn[i] >= minT) {
205  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
206  iv[i] = -(old + 1);
207  } else { // noise
208  iv[i] = -9998;
209  }
210  }
211  }
212  alpaka::syncBlockThreads(acc);
213 
215 
216  // propagate the negative id to all the tracks in the cluster.
217  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
218  if (iv[i] >= 0) {
219  // mark each track in a cluster with the same id as the first one
220  iv[i] = iv[iv[i]];
221  }
222  }
223  alpaka::syncBlockThreads(acc);
224 
225  // adjust the cluster id to be a positive value starting from 0
226  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
227  iv[i] = -iv[i] - 1;
228  }
229 
231  if constexpr (verbose) {
233  printf("found %d proto vertices\n", foundClusters);
234  }
235  }
237  public:
238  template <typename TAcc>
239  ALPAKA_FN_ACC void operator()(const TAcc& acc,
241  WsSoAView pws,
242  int minT, // min number of neighbours to be "seed"
243  float eps, // max absolute distance to cluster
244  float errmax, // max error to be "seed"
245  float chi2max // max normalized distance to cluster
246  ) const {
247  clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
248  }
249  };
250 
251 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
252 
253 #endif // RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, int minT, float eps, float errmax, float chi2max) const
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float eps
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float errmax
constexpr uint32_t MAXTRACKS
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
static constexpr uint32_t totbins()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int minT
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float float chi2max
constexpr uint32_t MAXVTX
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) clusterTracksByDensity(const TAcc &acc