CMS 3D CMS Logo

clusterTracksDBSCAN.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
2 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19 
22  // this algo does not really scale as it works in a single block...
23  // enough for <10K tracks we have
25  public:
26  template <typename TAcc>
27  ALPAKA_FN_ACC void operator()(const TAcc& acc,
29  WsSoAView pws,
30  int minT, // min number of neighbours to be "core"
31  float eps, // max absolute distance to cluster
32  float errmax, // max error to be "seed"
33  float chi2max // max normalized distance to cluster
34  ) const {
35  constexpr bool verbose = false; // in principle the compiler should optmize out if false
36  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
37  if constexpr (verbose) {
39  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
40  }
41  auto er2mx = errmax * errmax;
42 
43  auto& __restrict__ data = pdata;
44  auto& __restrict__ ws = pws;
45  auto nt = ws.ntrks();
46  float const* __restrict__ zt = ws.zt();
47  float const* __restrict__ ezt2 = ws.ezt2();
48 
49  uint32_t& nvFinal = data.nvFinal();
50  uint32_t& nvIntermediate = ws.nvIntermediate();
51 
52  uint8_t* __restrict__ izt = ws.izt();
53  int32_t* __restrict__ nn = data.ndof();
54  int32_t* __restrict__ iv = ws.iv();
55 
60 
62  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
63  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
64 
66  hist.off[j] = 0;
67  }
68  alpaka::syncBlockThreads(acc);
69 
70  if constexpr (verbose) {
72  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
73  }
74 
75  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
76 
77  // fill hist (bin shall be wider than "eps")
78  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
80  int iz = int(zt[i] * 10.); // valid if eps<=0.1
81  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
82  izt[i] = iz - INT8_MIN;
83  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
84  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
85  hist.count(acc, izt[i]);
86  iv[i] = i;
87  nn[i] = 0;
88  }
89  alpaka::syncBlockThreads(acc);
90  if (threadIdxLocal < 32)
91  hws[threadIdxLocal] = 0; // used by prefix scan...
92  alpaka::syncBlockThreads(acc);
93  hist.finalize(acc, hws);
94  alpaka::syncBlockThreads(acc);
95  ALPAKA_ASSERT_ACC(hist.size() == nt);
96  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
97  hist.fill(acc, izt[i], uint32_t(i));
98  }
99  alpaka::syncBlockThreads(acc);
100 
101  // count neighbours
102  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
103  if (ezt2[i] > er2mx)
104  continue;
105  auto loop = [&](uint32_t j) {
106  if (i == j)
107  return;
108  auto dist = std::abs(zt[i] - zt[j]);
109  if (dist > eps)
110  return;
111  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
112  nn[i]++;
113  };
114 
116  }
117 
118  alpaka::syncBlockThreads(acc);
119 
120  // find NN with smaller z...
121  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
122  if (nn[i] < minT)
123  continue; // DBSCAN core rule
124  float mz = zt[i];
125  auto loop = [&](uint32_t j) {
126  if (zt[j] >= mz)
127  return;
128  if (nn[j] < minT)
129  return; // DBSCAN core rule
130  auto dist = std::abs(zt[i] - zt[j]);
131  if (dist > eps)
132  return;
133  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
134  mz = zt[j];
135  iv[i] = j; // assign to cluster (better be unique??)
136  };
138  }
139 
140  alpaka::syncBlockThreads(acc);
141 
142 #ifdef GPU_DEBUG
143  // mini verification
144  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
145  if (iv[i] != int(i))
146  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
147  }
148  alpaka::syncBlockThreads(acc);
149 #endif
150 
151  // consolidate graph (percolate index of seed)
152  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
153  auto m = iv[i];
154  while (m != iv[m])
155  m = iv[m];
156  iv[i] = m;
157  }
158 
159  alpaka::syncBlockThreads(acc);
160 
161 #ifdef GPU_DEBUG
162  // mini verification
163  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
164  if (iv[i] != int(i))
165  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
166  }
167  alpaka::syncBlockThreads(acc);
168 #endif
169 
170 #ifdef GPU_DEBUG
171  // and verify that we did not spit any cluster...
172  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
173  if (nn[i] < minT)
174  continue; // DBSCAN core rule
175  ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
176  auto loop = [&](uint32_t j) {
177  if (nn[j] < minT)
178  return; // DBSCAN core rule
179  auto dist = std::abs(zt[i] - zt[j]);
180  if (dist > eps)
181  return;
182  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
183  // they should belong to the same cluster, isn't it?
184  if (iv[i] != iv[j]) {
185  printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
186  printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
187  ;
188  }
189  ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
190  };
192  }
193  alpaka::syncBlockThreads(acc);
194 #endif
195 
196  // collect edges (assign to closest cluster of closest point??? here to closest point)
197  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
198  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
199  if (nn[i] >= minT)
200  continue; // DBSCAN edge rule
201  float mdist = eps;
202  auto loop = [&](uint32_t j) {
203  if (nn[j] < minT)
204  return; // DBSCAN core rule
205  auto dist = std::abs(zt[i] - zt[j]);
206  if (dist > mdist)
207  return;
208  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
209  return; // needed?
210  mdist = dist;
211  iv[i] = iv[j]; // assign to cluster (better be unique??)
212  };
214  }
215 
216  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
217  foundClusters = 0;
218  alpaka::syncBlockThreads(acc);
219 
220  // find the number of different clusters, identified by a tracks with clus[i] == i;
221  // mark these tracks with a negative id.
222  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
223  if (iv[i] == int(i)) {
224  if (nn[i] >= minT) {
225  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
226  iv[i] = -(old + 1);
227  } else { // noise
228  iv[i] = -9998;
229  }
230  }
231  }
232  alpaka::syncBlockThreads(acc);
233 
235 
236  // propagate the negative id to all the tracks in the cluster.
237  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
238  if (iv[i] >= 0) {
239  // mark each track in a cluster with the same id as the first one
240  iv[i] = iv[iv[i]];
241  }
242  }
243  alpaka::syncBlockThreads(acc);
244 
245  // adjust the cluster id to be a positive value starting from 0
246  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
247  iv[i] = -iv[i] - 1;
248  }
249 
251 
252  if constexpr (verbose) {
254  printf("found %d proto vertices\n", foundClusters);
255  }
256  }
257  };
258 
259 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
260 
261 #endif // RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, int minT, float eps, float errmax, float chi2max) const
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float eps
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float errmax
constexpr uint32_t MAXTRACKS
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
static constexpr uint32_t totbins()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int minT
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float float chi2max
constexpr uint32_t MAXVTX