CMS 3D CMS Logo

clusterTracksIterative.h
Go to the documentation of this file.
1 #ifndef RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
2 #define RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19 
20  // This algo does not really scale as it works in a single block...
21  // It should be good enough for <10K tracks we have.
23  public:
24  ALPAKA_FN_ACC void operator()(Acc1D const& acc,
27  WsSoAView ws,
28  int minT, // min number of neighbours to be "core"
29  float eps, // max absolute distance to cluster
30  float errmax, // max error to be "seed"
31  float chi2max // max normalized distance to cluster
32  ) const {
33  constexpr bool verbose = false;
34 
35  if constexpr (verbose) {
37  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
38  }
39 
40  auto nt = ws.ntrks();
41  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
42  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
43 
44  float const* __restrict__ zt = ws.zt();
45  float const* __restrict__ ezt2 = ws.ezt2();
46  uint8_t* __restrict__ izt = ws.izt();
47  int32_t* __restrict__ iv = ws.iv();
48  int32_t* __restrict__ nn = trkdata.ndof();
54 
56  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
57  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
58 
59  for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
60  hist.off[j] = 0;
61  }
62  for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
63  hws[j] = 0; // used by prefix scan in hist.finalize()
64  }
65  alpaka::syncBlockThreads(acc);
66 
67  if constexpr (verbose) {
69  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
70  }
71 
73  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
74  ALPAKA_ASSERT_ACC(eps <= 0.1f); // see below
75 
76  // fill hist (bin shall be wider than "eps")
77  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
78  int iz = static_cast<int>(zt[i] * 10.f); // valid if eps <= 0.1
79  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
80  izt[i] = iz - INT8_MIN;
81  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
82  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
83  hist.count(acc, izt[i]);
84  iv[i] = i;
85  nn[i] = 0;
86  }
87  alpaka::syncBlockThreads(acc);
88 
89  hist.finalize(acc, hws);
90  alpaka::syncBlockThreads(acc);
91 
92  ALPAKA_ASSERT_ACC(hist.size() == nt);
93  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
94  hist.fill(acc, izt[i], uint16_t(i));
95  }
96  alpaka::syncBlockThreads(acc);
97 
98  // count neighbours
99  const auto errmax2 = errmax * errmax;
100  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
101  if (ezt2[i] > errmax2)
102  continue;
103  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
104  if (i == j)
105  return;
106  auto dist = std::abs(zt[i] - zt[j]);
107  if (dist > eps)
108  return;
109  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
110  return;
111  nn[i]++;
112  });
113  }
114 
115  auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
116  nloops = 0;
117  alpaka::syncBlockThreads(acc);
118 
119  // cluster seeds only
120  bool more = true;
121  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
122  if (1 == nloops % 2) {
123  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
124  auto m = iv[i];
125  while (m != iv[m])
126  m = iv[m];
127  iv[i] = m;
128  }
129  } else {
130  more = false;
131  for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
132  auto p = hist.begin() + k;
133  auto i = *p;
134  ++p;
135  if (nn[i] < minT)
136  continue; // DBSCAN core rule
137  auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
138  for (; p < hist.end(be); ++p) {
139  uint32_t j = *p;
140  ALPAKA_ASSERT_ACC(i != j);
141  if (nn[j] < minT)
142  continue; // DBSCAN core rule
143  auto dist = std::abs(zt[i] - zt[j]);
144  if (dist > eps)
145  continue;
146  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
147  continue;
148  auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Threads{});
149  if (old != iv[i]) {
150  // end the loop only if no changes were applied
151  more = true;
152  }
153  alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Threads{});
154  } // inner loop on p (and j)
155  } // outer loop on k (and i)
156  }
158  ++nloops;
159  } // while
160 
161  // collect edges (assign to closest cluster of closest point??? here to closest point)
162  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
163  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
164  if (nn[i] >= minT)
165  continue; // DBSCAN edge rule
166  float mdist = eps;
167  cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](int j) {
168  if (nn[j] < minT)
169  return; // DBSCAN core rule
170  auto dist = std::abs(zt[i] - zt[j]);
171  if (dist > mdist)
172  return;
173  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
174  return; // needed?
175  mdist = dist;
176  iv[i] = iv[j]; // assign to cluster (better be unique??)
177  });
178  }
179 
180  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
181  foundClusters = 0;
182  alpaka::syncBlockThreads(acc);
183 
184  // find the number of different clusters, identified by a tracks with clus[i] == i;
185  // mark these tracks with a negative id.
186  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
187  if (iv[i] == int(i)) {
188  if (nn[i] >= minT) {
189  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
190  iv[i] = -(old + 1);
191  } else { // noise
192  iv[i] = -9998;
193  }
194  }
195  }
196  alpaka::syncBlockThreads(acc);
197 
198  ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
199 
200  // propagate the negative id to all the tracks in the cluster.
201  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
202  if (iv[i] >= 0) {
203  // mark each track in a cluster with the same id as the first one
204  iv[i] = iv[iv[i]];
205  }
206  }
207  alpaka::syncBlockThreads(acc);
208 
209  // adjust the cluster id to be a positive value starting from 0
210  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
211  iv[i] = -iv[i] - 1;
212  }
213 
214  ws.nvIntermediate() = foundClusters;
215  data.nvFinal() = foundClusters;
216 
217  if constexpr (verbose) {
219  printf("found %d proto vertices\n", foundClusters);
220  }
221  }
222  };
223 
224 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
225 
226 #endif // RecoTracker_PixelVertexFinding_plugins_clusterTracksIterativeAlpaka_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
ALPAKA_ASSERT_ACC(nvFinal<=nvIntermediate)
bool verbose
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
double f[11][100]
__shared__ Hist::Counter hws[32]
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
Definition: sortByPt2.h:24
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85
ALPAKA_FN_ACC void operator()(Acc1D const &acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, int minT, float eps, float errmax, float chi2max) const