CMS 3D CMS Logo

clusterTracksIterative.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelVertexFinding_clusterTracksIterativeAlpaka_h
2 #define RecoTracker_PixelVertexFinding_clusterTracksIterativeAlpaka_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
15 
16 #include "vertexFinder.h"
17 
19  namespace vertexFinder {
20 
21  // this algo does not really scale as it works in a single block...
22  // enough for <10K tracks we have
24  public:
25  template <typename TAcc>
26  ALPAKA_FN_ACC void operator()(const TAcc& acc,
28  WsSoAView pws,
29  int minT, // min number of neighbours to be "core"
30  float eps, // max absolute distance to cluster
31  float errmax, // max error to be "seed"
32  float chi2max // max normalized distance to cluster
33  ) const {
34  constexpr bool verbose = false; // in principle the compiler should optmize out if false
35  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
36  if constexpr (verbose) {
38  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
39  }
40  auto er2mx = errmax * errmax;
41 
42  auto& __restrict__ data = pdata;
43  auto& __restrict__ ws = pws;
44  auto nt = ws.ntrks();
45  float const* __restrict__ zt = ws.zt();
46  float const* __restrict__ ezt2 = ws.ezt2();
47 
48  uint32_t& nvFinal = data.nvFinal();
49  uint32_t& nvIntermediate = ws.nvIntermediate();
50 
51  uint8_t* __restrict__ izt = ws.izt();
52  int32_t* __restrict__ nn = data.ndof();
53  int32_t* __restrict__ iv = ws.iv();
54 
59 
61  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
62  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
63 
65  hist.off[j] = 0;
66  }
67  alpaka::syncBlockThreads(acc);
68 
69  if constexpr (verbose) {
71  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
72  }
73 
74  ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
75 
76  // fill hist (bin shall be wider than "eps")
77  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
79  int iz = int(zt[i] * 10.); // valid if eps<=0.1
80  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
81  izt[i] = iz - INT8_MIN;
82  ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
83  ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
84  hist.count(acc, izt[i]);
85  iv[i] = i;
86  nn[i] = 0;
87  }
88  alpaka::syncBlockThreads(acc);
89 
90  if (threadIdxLocal < 32)
91  hws[threadIdxLocal] = 0; // used by prefix scan...
92  alpaka::syncBlockThreads(acc);
93 
94  hist.finalize(acc, hws);
95  alpaka::syncBlockThreads(acc);
96 
97  ALPAKA_ASSERT_ACC(hist.size() == nt);
98  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
99  hist.fill(acc, izt[i], uint16_t(i));
100  }
101  alpaka::syncBlockThreads(acc);
102 
103  // count neighbours
104  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
105  if (ezt2[i] > er2mx)
106  continue;
107  auto loop = [&](uint32_t j) {
108  if (i == j)
109  return;
110  auto dist = std::abs(zt[i] - zt[j]);
111  if (dist > eps)
112  return;
113  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
114  return;
115  nn[i]++;
116  };
117 
119  }
120 
121  auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
122  nloops = 0;
123 
124  alpaka::syncBlockThreads(acc);
125 
126  // cluster seeds only
127  bool more = true;
128  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
129  if (1 == nloops % 2) {
130  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
131  auto m = iv[i];
132  while (m != iv[m])
133  m = iv[m];
134  iv[i] = m;
135  }
136  } else {
137  more = false;
138  for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
139  auto p = hist.begin() + k;
140  auto i = (*p);
141  auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
142  if (nn[i] < minT)
143  continue; // DBSCAN core rule
144  auto loop = [&](uint32_t j) {
145  ALPAKA_ASSERT_ACC(i != j);
146  if (nn[j] < minT)
147  return; // DBSCAN core rule
148  auto dist = std::abs(zt[i] - zt[j]);
149  if (dist > eps)
150  return;
151  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
152  return;
153  auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Blocks{});
154  if (old != iv[i]) {
155  // end the loop only if no changes were applied
156  more = true;
157  }
159  };
160  ++p;
161  for (; p < hist.end(be); ++p)
162  loop(*p);
163  } // for i
164  }
165  if (threadIdxLocal == 0)
166  ++nloops;
167  } // while
168 
169  // collect edges (assign to closest cluster of closest point??? here to closest point)
170  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
171  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
172  if (nn[i] >= minT)
173  continue; // DBSCAN edge rule
174  float mdist = eps;
175  auto loop = [&](int j) {
176  if (nn[j] < minT)
177  return; // DBSCAN core rule
178  auto dist = std::abs(zt[i] - zt[j]);
179  if (dist > mdist)
180  return;
181  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
182  return; // needed?
183  mdist = dist;
184  iv[i] = iv[j]; // assign to cluster (better be unique??)
185  };
187  }
188 
189  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
190  foundClusters = 0;
191  alpaka::syncBlockThreads(acc);
192 
193  // find the number of different clusters, identified by a tracks with clus[i] == i;
194  // mark these tracks with a negative id.
195  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
196  if (iv[i] == int(i)) {
197  if (nn[i] >= minT) {
198  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
199  iv[i] = -(old + 1);
200  } else { // noise
201  iv[i] = -9998;
202  }
203  }
204  }
205  alpaka::syncBlockThreads(acc);
206 
208 
209  // propagate the negative id to all the tracks in the cluster.
210  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
211  if (iv[i] >= 0) {
212  // mark each track in a cluster with the same id as the first one
213  iv[i] = iv[iv[i]];
214  }
215  }
216  alpaka::syncBlockThreads(acc);
217 
218  // adjust the cluster id to be a positive value starting from 0
219  for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
220  iv[i] = -iv[i] - 1;
221  }
222 
224 
225  if constexpr (verbose) {
227  printf("found %d proto vertices\n", foundClusters);
228  }
229  }
230  };
231  } // namespace vertexFinder
232 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
233 #endif // RecoTracker_PixelVertexFinding_plugins_clusterTracksIterativeAlpaka_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float eps
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float errmax
constexpr uint32_t MAXTRACKS
static constexpr UT bin(T t)
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
static constexpr uint32_t totbins()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, int minT, float eps, float errmax, float chi2max) const
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int minT
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float float chi2max
constexpr uint32_t MAXVTX
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85