CMS 3D CMS Logo

gpuClusterTracksDBSCAN.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
10 
11 #include "gpuVertexFinder.h"
12 
13 namespace gpuVertexFinder {
14 
15  // this algo does not really scale as it works in a single block...
16  // enough for <10K tracks we have
17  __global__ void clusterTracksDBSCAN(ZVertices* pdata,
19  int minT, // min number of neighbours to be "core"
20  float eps, // max absolute distance to cluster
21  float errmax, // max error to be "seed"
22  float chi2max // max normalized distance to cluster
23  ) {
24  constexpr bool verbose = false; // in principle the compiler should optmize out if false
25 
26  if (verbose && 0 == threadIdx.x)
27  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
28 
29  auto er2mx = errmax * errmax;
30 
31  auto& __restrict__ data = *pdata;
32  auto& __restrict__ ws = *pws;
33  auto nt = ws.ntrks;
34  float const* __restrict__ zt = ws.zt;
35  float const* __restrict__ ezt2 = ws.ezt2;
36 
37  uint32_t& nvFinal = data.nvFinal;
38  uint32_t& nvIntermediate = ws.nvIntermediate;
39 
40  uint8_t* __restrict__ izt = ws.izt;
41  int32_t* __restrict__ nn = data.ndof;
42  int32_t* __restrict__ iv = ws.iv;
43 
44  assert(pdata);
45  assert(zt);
46 
48  __shared__ Hist hist;
49  __shared__ typename Hist::Counter hws[32];
50  for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
51  hist.off[j] = 0;
52  }
53  __syncthreads();
54 
55  if (verbose && 0 == threadIdx.x)
56  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
57 
58  assert((int)nt <= hist.capacity());
59 
60  // fill hist (bin shall be wider than "eps")
61  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
63  int iz = int(zt[i] * 10.); // valid if eps<=0.1
64  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
65  izt[i] = iz - INT8_MIN;
66  assert(iz - INT8_MIN >= 0);
67  assert(iz - INT8_MIN < 256);
68  hist.count(izt[i]);
69  iv[i] = i;
70  nn[i] = 0;
71  }
72  __syncthreads();
73  if (threadIdx.x < 32)
74  hws[threadIdx.x] = 0; // used by prefix scan...
75  __syncthreads();
76  hist.finalize(hws);
77  __syncthreads();
78  assert(hist.size() == nt);
79  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
80  hist.fill(izt[i], uint16_t(i));
81  }
82  __syncthreads();
83 
84  // count neighbours
85  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
86  if (ezt2[i] > er2mx)
87  continue;
88  auto loop = [&](uint32_t j) {
89  if (i == j)
90  return;
91  auto dist = std::abs(zt[i] - zt[j]);
92  if (dist > eps)
93  return;
94  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
95  nn[i]++;
96  };
97 
98  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
99  }
100 
101  __syncthreads();
102 
103  // find NN with smaller z...
104  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
105  if (nn[i] < minT)
106  continue; // DBSCAN core rule
107  float mz = zt[i];
108  auto loop = [&](uint32_t j) {
109  if (zt[j] >= mz)
110  return;
111  if (nn[j] < minT)
112  return; // DBSCAN core rule
113  auto dist = std::abs(zt[i] - zt[j]);
114  if (dist > eps)
115  return;
116  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
117  mz = zt[j];
118  iv[i] = j; // assign to cluster (better be unique??)
119  };
120  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
121  }
122 
123  __syncthreads();
124 
125 #ifdef GPU_DEBUG
126  // mini verification
127  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
128  if (iv[i] != int(i))
129  assert(iv[iv[i]] != int(i));
130  }
131  __syncthreads();
132 #endif
133 
134  // consolidate graph (percolate index of seed)
135  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
136  auto m = iv[i];
137  while (m != iv[m])
138  m = iv[m];
139  iv[i] = m;
140  }
141 
142  __syncthreads();
143 
144 #ifdef GPU_DEBUG
145  // mini verification
146  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
147  if (iv[i] != int(i))
148  assert(iv[iv[i]] != int(i));
149  }
150  __syncthreads();
151 #endif
152 
153 #ifdef GPU_DEBUG
154  // and verify that we did not spit any cluster...
155  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
156  if (nn[i] < minT)
157  continue; // DBSCAN core rule
158  assert(zt[iv[i]] <= zt[i]);
159  auto loop = [&](uint32_t j) {
160  if (nn[j] < minT)
161  return; // DBSCAN core rule
162  auto dist = std::abs(zt[i] - zt[j]);
163  if (dist > eps)
164  return;
165  // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
166  // they should belong to the same cluster, isn't it?
167  if (iv[i] != iv[j]) {
168  printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
169  printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
170  ;
171  }
172  assert(iv[i] == iv[j]);
173  };
174  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
175  }
176  __syncthreads();
177 #endif
178 
179  // collect edges (assign to closest cluster of closest point??? here to closest point)
180  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
181  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
182  if (nn[i] >= minT)
183  continue; // DBSCAN edge rule
184  float mdist = eps;
185  auto loop = [&](uint32_t j) {
186  if (nn[j] < minT)
187  return; // DBSCAN core rule
188  auto dist = std::abs(zt[i] - zt[j]);
189  if (dist > mdist)
190  return;
191  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
192  return; // needed?
193  mdist = dist;
194  iv[i] = iv[j]; // assign to cluster (better be unique??)
195  };
196  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
197  }
198 
199  __shared__ unsigned int foundClusters;
200  foundClusters = 0;
201  __syncthreads();
202 
203  // find the number of different clusters, identified by a tracks with clus[i] == i;
204  // mark these tracks with a negative id.
205  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
206  if (iv[i] == int(i)) {
207  if (nn[i] >= minT) {
208  auto old = atomicInc(&foundClusters, 0xffffffff);
209  iv[i] = -(old + 1);
210  } else { // noise
211  iv[i] = -9998;
212  }
213  }
214  }
215  __syncthreads();
216 
218 
219  // propagate the negative id to all the tracks in the cluster.
220  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
221  if (iv[i] >= 0) {
222  // mark each track in a cluster with the same id as the first one
223  iv[i] = iv[iv[i]];
224  }
225  }
226  __syncthreads();
227 
228  // adjust the cluster id to be a positive value starting from 0
229  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
230  iv[i] = -iv[i] - 1;
231  }
232 
234 
235  if (verbose && 0 == threadIdx.x)
236  printf("found %d proto vertices\n", foundClusters);
237  }
238 
239 } // namespace gpuVertexFinder
240 
241 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
const dim3 threadIdx
Definition: cudaCompat.h:29
int32_t *__restrict__ iv
WorkSpace int float eps
bool verbose
auto &__restrict__ data
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
static constexpr uint32_t MAXVTX
Definition: ZVertexSoA.h:12
float const *__restrict__ zt
auto &__restrict__ ws
WorkSpace int float float float chi2max
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
typename Base::Counter Counter
float const *__restrict__ ezt2
__shared__ Hist::Counter hws[32]
uint8_t *__restrict__ izt
WorkSpace int float float errmax
__shared__ Hist hist
static constexpr uint32_t nbins()
ZVertexSoA ZVertices
static constexpr uint32_t totbins()
int32_t *__restrict__ nn
static constexpr uint32_t MAXTRACKS
Definition: ZVertexSoA.h:11
constexpr auto capacity() const
__shared__ unsigned int foundClusters