CMS 3D CMS Logo

clusterTracksByDensity.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_alpaka_clusterTracksByDensity_h
2 #define RecoPixelVertexing_PixelVertexFinding_alpaka_clusterTracksByDensity_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <alpaka/alpaka.hpp>
12 #include "vertexFinder.h"
13 
15  namespace vertexFinder {
18  // this algo does not really scale as it works in a single block...
19  // enough for <10K tracks we have
20  //
21  // based on Rodrighez&Laio algo
22  //
23  template <typename TAcc>
24  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
25  clusterTracksByDensity(const TAcc& acc,
27  WsSoAView& pws,
28  int minT, // min number of neighbours to be "seed"
29  float eps, // max absolute distance to cluster
30  float errmax, // max error to be "seed"
31  float chi2max // max normalized distance to cluster
32  ) {
33  using namespace vertexFinder;
34  constexpr bool verbose = false; // in principle the compiler should optmize out if false
35  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
36 
39  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
40  }
41  auto er2mx = errmax * errmax;
42 
43  auto& __restrict__ data = pdata;
44  auto& __restrict__ ws = pws;
45  auto nt = ws.ntrks();
46  float const* __restrict__ zt = ws.zt();
47  float const* __restrict__ ezt2 = ws.ezt2();
48 
49  uint32_t& nvFinal = data.nvFinal();
50  uint32_t& nvIntermediate = ws.nvIntermediate();
51 
52  uint8_t* __restrict__ izt = ws.izt();
53  int32_t* __restrict__ nn = data.ndof();
54  int32_t* __restrict__ iv = ws.iv();
55 
61 
63  auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
64  auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
65 
67  hist.off[j] = 0;
68  }
69  alpaka::syncBlockThreads(acc);
70 
71  if constexpr (verbose) {
73  printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
74  }
75  ALPAKA_ASSERT_OFFLOAD(static_cast<int>(nt) <= hist.capacity());
76 
77  // fill hist (bin shall be wider than "eps")
80  int iz = int(zt[i] * 10.); // valid if eps<=0.1
81  // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only
82  iz = std::min(std::max(iz, INT8_MIN), INT8_MAX);
83  izt[i] = iz - INT8_MIN;
84  ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0);
85  ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256);
86  hist.count(acc, izt[i]);
87  iv[i] = i;
88  nn[i] = 0;
89  }
90  alpaka::syncBlockThreads(acc);
91  if (threadIdxLocal < 32)
92  hws[threadIdxLocal] = 0; // used by prefix scan...
93  alpaka::syncBlockThreads(acc);
94  hist.finalize(acc, hws);
95  alpaka::syncBlockThreads(acc);
96  ALPAKA_ASSERT_OFFLOAD(hist.size() == nt);
97  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
98  hist.fill(acc, izt[i], uint16_t(i));
99  }
100  alpaka::syncBlockThreads(acc);
101  // count neighbours
102  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
103  if (ezt2[i] > er2mx)
104  continue;
105  auto loop = [&](uint32_t j) {
106  if (i == j)
107  return;
108  auto dist = std::abs(zt[i] - zt[j]);
109  if (dist > eps)
110  return;
111  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
112  return;
113  nn[i]++;
114  };
115 
117  }
118  alpaka::syncBlockThreads(acc);
119 
120  // find closest above me .... (we ignore the possibility of two j at same distance from i)
121  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
122  float mdist = eps;
123  auto loop = [&](uint32_t j) {
124  if (nn[j] < nn[i])
125  return;
126  if (nn[j] == nn[i] && zt[j] >= zt[i])
127  return; // if equal use natural order...
128  auto dist = std::abs(zt[i] - zt[j]);
129  if (dist > mdist)
130  return;
131  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
132  return; // (break natural order???)
133  mdist = dist;
134  iv[i] = j; // assign to cluster (better be unique??)
135  };
137  }
138  alpaka::syncBlockThreads(acc);
139 
140 #ifdef GPU_DEBUG
141  // mini verification
142  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
143  if (iv[i] != int(i))
144  ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i));
145  }
146  alpaka::syncBlockThreads(acc);
147 #endif
148 
149  // consolidate graph (percolate index of seed)
150  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
151  auto m = iv[i];
152  while (m != iv[m])
153  m = iv[m];
154  iv[i] = m;
155  }
156 
157 #ifdef GPU_DEBUG
158  alpaka::syncBlockThreads(acc);
159  // mini verification
160  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
161  if (iv[i] != int(i))
162  ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i));
163  }
164 #endif
165 
166 #ifdef GPU_DEBUG
167  // and verify that we did not spit any cluster...
168  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
169  auto minJ = i;
170  auto mdist = eps;
171  auto loop = [&](uint32_t j) {
172  if (nn[j] < nn[i])
173  return;
174  if (nn[j] == nn[i] && zt[j] >= zt[i])
175  return; // if equal use natural order...
176  auto dist = std::abs(zt[i] - zt[j]);
177  if (dist > mdist)
178  return;
179  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
180  return;
181  mdist = dist;
182  minJ = j;
183  };
185  // should belong to the same cluster...
186  ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[minJ]);
187  ALPAKA_ASSERT_OFFLOAD(nn[i] <= nn[iv[i]]);
188  }
189  alpaka::syncBlockThreads(acc);
190 #endif
191 
192  auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
193  foundClusters = 0;
194  alpaka::syncBlockThreads(acc);
195 
196  // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
197  // mark these tracks with a negative id.
198  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
199  if (iv[i] == int(i)) {
200  if (nn[i] >= minT) {
201  auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
202  iv[i] = -(old + 1);
203  } else { // noise
204  iv[i] = -9998;
205  }
206  }
207  }
208  alpaka::syncBlockThreads(acc);
209 
211 
212  // propagate the negative id to all the tracks in the cluster.
213  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
214  if (iv[i] >= 0) {
215  // mark each track in a cluster with the same id as the first one
216  iv[i] = iv[iv[i]];
217  }
218  }
219  alpaka::syncBlockThreads(acc);
220 
221  // adjust the cluster id to be a positive value starting from 0
222  for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) {
223  iv[i] = -iv[i] - 1;
224  }
225 
227  if constexpr (verbose) {
229  printf("found %d proto vertices\n", foundClusters);
230  }
231  }
233  public:
234  template <typename TAcc>
235  ALPAKA_FN_ACC void operator()(const TAcc& acc,
237  WsSoAView pws,
238  int minT, // min number of neighbours to be "seed"
239  float eps, // max absolute distance to cluster
240  float errmax, // max error to be "seed"
241  float chi2max // max normalized distance to cluster
242  ) const {
243  clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
244  }
245  };
246  } // namespace vertexFinder
247 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
248 #endif // RecoPixelVertexing_PixelVertexFinding_alpaka_clusterTracksByDensity_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
Definition: workdivision.h:805
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, int minT, float eps, float errmax, float chi2max) const
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float eps
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float errmax
constexpr uint32_t MAXTRACKS
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
static constexpr uint32_t totbins()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int minT
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView int float float float chi2max
constexpr uint32_t MAXVTX
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) clusterTracksByDensity(const TAcc &acc