CMS 3D CMS Logo

clusterTracksDBSCAN.h
Go to the documentation of this file.
1 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
2 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19 
20  // This algo does not really scale as it works in a single block...
21  // It should be good enough for <10K tracks we have.
23  public:
24  ALPAKA_FN_ACC void operator()(Acc1D const& acc,
27  WsSoAView ws,
28  int minT, // min number of neighbours to be "core"
29  float eps, // max absolute distance to cluster
30  float errmax, // max error to be "seed"
31  float chi2max // max normalized distance to cluster
32  ) const {
33  constexpr bool verbose = false;
34 
35  if constexpr (verbose) {
37  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
38  }
39 
40  auto nt = ws.ntrks();
41  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
42  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
43 
44  float const* __restrict__ zt = ws.zt();
45  float const* __restrict__ ezt2 = ws.ezt2();
46  uint8_t* __restrict__ izt = ws.izt();
47  int32_t* __restrict__ iv = ws.iv();
48  int32_t* __restrict__ nn = trkdata.ndof();
54 
56  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
57  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
58 
59  for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
60  hist.off[j] = 0;
61  }
62  for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
63  hws[j] = 0; // used by prefix scan in hist.finalize()
64  }
65  alpaka::syncBlockThreads(acc);
66 
67  if constexpr (verbose) {
69  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
70  }
71 
73  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
74  ALPAKA_ASSERT_ACC(eps <= 0.1f); // see below
75 
76  // fill hist (bin shall be wider than "eps")
77  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
78  int iz = static_cast<int>(zt[i] * 10.f); // valid if eps <= 0.1
79  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
80  izt[i] = iz - INT8_MIN;
81  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
82  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
83  hist.count(acc, izt[i]);
84  iv[i] = i;
85  nn[i] = 0;
86  }
87  alpaka::syncBlockThreads(acc);
88 
89  hist.finalize(acc, hws);
90  alpaka::syncBlockThreads(acc);
91 
92  ALPAKA_ASSERT_ACC(hist.size() == nt);
93  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
94  hist.fill(acc, izt[i], uint16_t(i));
95  }
96  alpaka::syncBlockThreads(acc);
97 
98  // count neighbours
99  const auto errmax2 = errmax * errmax;
100  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
101  if (ezt2[i] > errmax2)
102  continue;
103  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
104  if (i == j)
105  return;
106  auto dist = std::abs(zt[i] - zt[j]);
107  if (dist > eps)
108  return;
109  nn[i]++;
110  });
111  }
112  alpaka::syncBlockThreads(acc);
113 
114  // find NN with smaller z...
115  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
116  if (nn[i] < minT)
117  continue; // DBSCAN core rule
118  float mz = zt[i];
119  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
120  if (zt[j] >= mz)
121  return;
122  if (nn[j] < minT)
123  return; // DBSCAN core rule
124  auto dist = std::abs(zt[i] - zt[j]);
125  if (dist > eps)
126  return;
127  mz = zt[j];
128  iv[i] = j; // assign to cluster (better be unique??)
129  });
130  }
131  alpaka::syncBlockThreads(acc);
132 
133 #ifdef GPU_DEBUG
134  // mini verification
135  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
136  if (iv[i] != int(i))
137  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
138  }
139  alpaka::syncBlockThreads(acc);
140 #endif
141 
142  // consolidate graph (percolate index of seed)
143  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
144  auto m = iv[i];
145  while (m != iv[m])
146  m = iv[m];
147  iv[i] = m;
148  }
149  alpaka::syncBlockThreads(acc);
150 
151 #ifdef GPU_DEBUG
152  // mini verification
153  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
154  if (iv[i] != int(i))
155  ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
156  }
157  alpaka::syncBlockThreads(acc);
158 #endif
159 
160 #ifdef GPU_DEBUG
161  // and verify that we did not split any cluster...
162  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
163  if (nn[i] < minT)
164  continue; // DBSCAN core rule
165  ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
166  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
167  if (nn[j] < minT)
168  return; // DBSCAN core rule
169  auto dist = std::abs(zt[i] - zt[j]);
170  if (dist > eps)
171  return;
172  // they should belong to the same cluster, isn't it?
173  if (iv[i] != iv[j]) {
174  printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
175  printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
176  }
177  ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
178  });
179  }
180  alpaka::syncBlockThreads(acc);
181 #endif
182 
183  // collect edges (assign to closest cluster of closest point??? here to closest point)
184  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
185  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
186  if (nn[i] >= minT)
187  continue; // DBSCAN edge rule
188  float mdist = eps;
189  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
190  if (nn[j] < minT)
191  return; // DBSCAN core rule
192  auto dist = std::abs(zt[i] - zt[j]);
193  if (dist > mdist)
194  return;
195  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
196  return; // needed?
197  mdist = dist;
198  iv[i] = iv[j]; // assign to cluster (better be unique??)
199  });
200  }
201 
202  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
203  foundClusters = 0;
204  alpaka::syncBlockThreads(acc);
205 
206  // find the number of different clusters, identified by a tracks with clus[i] == i;
207  // mark these tracks with a negative id.
208  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
209  if (iv[i] == int(i)) {
210  if (nn[i] >= minT) {
211  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
212  iv[i] = -(old + 1);
213  } else { // noise
214  iv[i] = -9998;
215  }
216  }
217  }
218  alpaka::syncBlockThreads(acc);
219 
220  ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
221 
222  // propagate the negative id to all the tracks in the cluster.
223  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
224  if (iv[i] >= 0) {
225  // mark each track in a cluster with the same id as the first one
226  iv[i] = iv[iv[i]];
227  }
228  }
229  alpaka::syncBlockThreads(acc);
230 
231  // adjust the cluster id to be a positive value starting from 0
232  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
233  iv[i] = -iv[i] - 1;
234  }
235 
236  ws.nvIntermediate() = foundClusters;
237  data.nvFinal() = foundClusters;
238 
239  if constexpr (verbose) {
241  printf("found %d proto vertices\n", foundClusters);
242  }
243  }
244  };
245 
246 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
247 
248 #endif // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
ALPAKA_FN_ACC void operator()(Acc1D const &acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, int minT, float eps, float errmax, float chi2max) const
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
ALPAKA_ASSERT_ACC(nvFinal<=nvIntermediate)
bool verbose
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
double f[11][100]
__shared__ Hist::Counter hws[32]
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
Definition: sortByPt2.h:24
ALPAKA_ACCELERATOR_NAMESPACE::Acc1D Acc1D
Definition: LSTEvent.dev.cc:15