CMS 3D CMS Logo

gpuClusterTracksIterative.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
10 
11 #include "gpuVertexFinder.h"
12 
13 namespace gpuVertexFinder {
14 
15  // this algo does not really scale as it works in a single block...
16  // enough for <10K tracks we have
17  __global__ void clusterTracksIterative(ZVertices* pdata,
18  WorkSpace* pws,
19  int minT, // min number of neighbours to be "core"
20  float eps, // max absolute distance to cluster
21  float errmax, // max error to be "seed"
22  float chi2max // max normalized distance to cluster
23  ) {
24  constexpr bool verbose = false; // in principle the compiler should optmize out if false
25 
26  if (verbose && 0 == threadIdx.x)
27  printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
28 
29  auto er2mx = errmax * errmax;
30 
31  auto& __restrict__ data = *pdata;
32  auto& __restrict__ ws = *pws;
33  auto nt = ws.ntrks;
34  float const* __restrict__ zt = ws.zt;
35  float const* __restrict__ ezt2 = ws.ezt2;
36 
37  uint32_t& nvFinal = data.nvFinal;
38  uint32_t& nvIntermediate = ws.nvIntermediate;
39 
40  uint8_t* __restrict__ izt = ws.izt;
41  int32_t* __restrict__ nn = data.ndof;
42  int32_t* __restrict__ iv = ws.iv;
43 
44  assert(pdata);
45  assert(zt);
46 
48  __shared__ Hist hist;
49  __shared__ typename Hist::Counter hws[32];
50  for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
51  hist.off[j] = 0;
52  }
53  __syncthreads();
54 
55  if (verbose && 0 == threadIdx.x)
56  printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
57 
58  assert((int)nt <= hist.capacity());
59 
60  // fill hist (bin shall be wider than "eps")
61  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
63  int iz = int(zt[i] * 10.); // valid if eps<=0.1
64  iz = std::clamp(iz, INT8_MIN, INT8_MAX);
65  izt[i] = iz - INT8_MIN;
66  assert(iz - INT8_MIN >= 0);
67  assert(iz - INT8_MIN < 256);
68  hist.count(izt[i]);
69  iv[i] = i;
70  nn[i] = 0;
71  }
72  __syncthreads();
73  if (threadIdx.x < 32)
74  hws[threadIdx.x] = 0; // used by prefix scan...
75  __syncthreads();
76  hist.finalize(hws);
77  __syncthreads();
78  assert(hist.size() == nt);
79  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
80  hist.fill(izt[i], uint16_t(i));
81  }
82  __syncthreads();
83 
84  // count neighbours
85  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
86  if (ezt2[i] > er2mx)
87  continue;
88  auto loop = [&](uint32_t j) {
89  if (i == j)
90  return;
91  auto dist = std::abs(zt[i] - zt[j]);
92  if (dist > eps)
93  return;
94  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
95  return;
96  nn[i]++;
97  };
98 
99  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
100  }
101 
102  __shared__ int nloops;
103  nloops = 0;
104 
105  __syncthreads();
106 
107  // cluster seeds only
108  bool more = true;
110  if (1 == nloops % 2) {
111  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
112  auto m = iv[i];
113  while (m != iv[m])
114  m = iv[m];
115  iv[i] = m;
116  }
117  } else {
118  more = false;
119  for (auto k = threadIdx.x; k < hist.size(); k += blockDim.x) {
120  auto p = hist.begin() + k;
121  auto i = (*p);
122  auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
123  if (nn[i] < minT)
124  continue; // DBSCAN core rule
125  auto loop = [&](uint32_t j) {
126  assert(i != j);
127  if (nn[j] < minT)
128  return; // DBSCAN core rule
129  auto dist = std::abs(zt[i] - zt[j]);
130  if (dist > eps)
131  return;
132  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
133  return;
134  auto old = atomicMin(&iv[j], iv[i]);
135  if (old != iv[i]) {
136  // end the loop only if no changes were applied
137  more = true;
138  }
139  atomicMin(&iv[i], old);
140  };
141  ++p;
142  for (; p < hist.end(be); ++p)
143  loop(*p);
144  } // for i
145  }
146  if (threadIdx.x == 0)
147  ++nloops;
148  } // while
149 
150  // collect edges (assign to closest cluster of closest point??? here to closest point)
151  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
152  // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule
153  if (nn[i] >= minT)
154  continue; // DBSCAN edge rule
155  float mdist = eps;
156  auto loop = [&](int j) {
157  if (nn[j] < minT)
158  return; // DBSCAN core rule
159  auto dist = std::abs(zt[i] - zt[j]);
160  if (dist > mdist)
161  return;
162  if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
163  return; // needed?
164  mdist = dist;
165  iv[i] = iv[j]; // assign to cluster (better be unique??)
166  };
167  cms::cuda::forEachInBins(hist, izt[i], 1, loop);
168  }
169 
170  __shared__ unsigned int foundClusters;
171  foundClusters = 0;
172  __syncthreads();
173 
174  // find the number of different clusters, identified by a tracks with clus[i] == i;
175  // mark these tracks with a negative id.
176  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
177  if (iv[i] == int(i)) {
178  if (nn[i] >= minT) {
179  auto old = atomicInc(&foundClusters, 0xffffffff);
180  iv[i] = -(old + 1);
181  } else { // noise
182  iv[i] = -9998;
183  }
184  }
185  }
186  __syncthreads();
187 
189 
190  // propagate the negative id to all the tracks in the cluster.
191  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
192  if (iv[i] >= 0) {
193  // mark each track in a cluster with the same id as the first one
194  iv[i] = iv[iv[i]];
195  }
196  }
197  __syncthreads();
198 
199  // adjust the cluster id to be a positive value starting from 0
200  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
201  iv[i] = -iv[i] - 1;
202  }
203 
205 
206  if (verbose && 0 == threadIdx.x)
207  printf("found %d proto vertices\n", foundClusters);
208  }
209 
210 } // namespace gpuVertexFinder
211 
212 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
mps_fire.i
i
Definition: mps_fire.py:428
ZVertexSoA::MAXVTX
static constexpr uint32_t MAXVTX
Definition: ZVertexSoA.h:12
gpuVertexFinder::assert
assert(pdata)
min
T min(T a, T b)
Definition: MathUtil.h:58
gpuVertexFinder::iv
int32_t *__restrict__ iv
Definition: gpuClusterTracksDBSCAN.h:42
gpuVertexFinder::eps
WorkSpace int float eps
Definition: gpuClusterTracksDBSCAN.h:18
gpuVertexFinder
Definition: gpuClusterTracksByDensity.h:13
gpuVertexFinder.h
gpuVertexFinder::nloops
__shared__ int nloops
Definition: gpuClusterTracksIterative.h:102
gpuVertexFinder::ws
auto &__restrict__ ws
Definition: gpuClusterTracksDBSCAN.h:32
gpuVertexFinder::zt
float const *__restrict__ zt
Definition: gpuClusterTracksDBSCAN.h:34
cms::cudacompat::__syncthreads_or
bool __syncthreads_or(bool x)
Definition: cudaCompat.h:110
__global__
#define __global__
Definition: cudaCompat.h:19
gpuVertexFinder::data
auto &__restrict__ data
Definition: gpuClusterTracksDBSCAN.h:31
gpuVertexFinder::nvFinal
uint32_t & nvFinal
Definition: gpuClusterTracksDBSCAN.h:37
gpuVertexFinder::more
bool more
Definition: gpuClusterTracksIterative.h:108
cms::cuda::HistoContainer::Counter
typename Base::Counter Counter
Definition: HistoContainer.h:105
gpuVertexFinder::Hist
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
Definition: gpuClusterTracksDBSCAN.h:47
visualization-live-secondInstance_cfg.m
m
Definition: visualization-live-secondInstance_cfg.py:78
gpuVertexFinder::izt
uint8_t *__restrict__ izt
Definition: gpuClusterTracksDBSCAN.h:40
verbose
static constexpr int verbose
Definition: HLTExoticaSubAnalysis.cc:25
dqmdumpme.k
k
Definition: dqmdumpme.py:60
gpuVertexFinder::nt
auto nt
Definition: gpuClusterTracksDBSCAN.h:33
gpuVertexFinder::errmax
WorkSpace int float float errmax
Definition: gpuClusterTracksDBSCAN.h:18
AlCaHLTBitMon_ParallelJobs.p
def p
Definition: AlCaHLTBitMon_ParallelJobs.py:153
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
gpuVertexFinder::hist
__shared__ Hist hist
Definition: gpuClusterTracksDBSCAN.h:48
heppy_loop.loop
loop
Definition: heppy_loop.py:28
gpuVertexFinder::hws
__shared__ Hist::Counter hws[32]
Definition: gpuClusterTracksDBSCAN.h:49
createfilelist.int
int
Definition: createfilelist.py:10
gpuVertexFinder::chi2max
WorkSpace int float float float chi2max
Definition: gpuClusterTracksDBSCAN.h:23
gpuVertexFinder::pws
WorkSpace * pws
Definition: gpuClusterTracksDBSCAN.h:18
gpuVertexFinder::minT
WorkSpace int minT
Definition: gpuClusterTracksDBSCAN.h:18
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
ZVertexSoA::MAXTRACKS
static constexpr uint32_t MAXTRACKS
Definition: ZVertexSoA.h:11
HistoContainer.h
cms::cuda::HistoContainer::nbins
static constexpr uint32_t nbins()
Definition: HistoContainer.h:123
gpuVertexFinder::ezt2
float const *__restrict__ ezt2
Definition: gpuClusterTracksDBSCAN.h:35
gpuVertexFinder::er2mx
auto er2mx
Definition: gpuClusterTracksDBSCAN.h:29
cms::cudacompat::atomicMin
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85
cms::cuda::OneToManyAssoc::capacity
constexpr auto capacity() const
Definition: OneToManyAssoc.h:169
cms::cuda::HistoContainer::bin
static constexpr UT bin(T t)
Definition: HistoContainer.h:132
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
gpuVertexFinder::foundClusters
__shared__ unsigned int foundClusters
Definition: gpuClusterTracksDBSCAN.h:199
cms::cuda::be
int be
Definition: HistoContainer.h:75
cms::cuda::HistoContainer
Definition: HistoContainer.h:101
cuda_assert.h
dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66
cms::cuda::HistoContainer::totbins
static constexpr uint32_t totbins()
Definition: HistoContainer.h:125
gpuVertexFinder::__syncthreads
__syncthreads()
Definition: cudaCompat.h:108
gpuVertexFinder::ZVertices
ZVertexSoA ZVertices
Definition: gpuVertexFinder.h:11
gpuVertexFinder::nn
int32_t *__restrict__ nn
Definition: gpuClusterTracksDBSCAN.h:41
gpuVertexFinder::nvIntermediate
uint32_t & nvIntermediate
Definition: gpuClusterTracksDBSCAN.h:38
cms::cudacompat::atomicInc
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
cms::cuda::OneToManyAssoc::off
off[c.m]
Definition: OneToManyAssoc.h:236