CMS 3D CMS Logo

gpuSplitVertices.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuSplitVertices_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuSplitVertices_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
10 
11 #include "gpuVertexFinder.h"
12 
13 namespace gpuVertexFinder {
14 
15  __device__ __forceinline__ void splitVertices(ZVertices* pdata, WorkSpace* pws, float maxChi2) {
16  constexpr bool verbose = false; // in principle the compiler should optmize out if false
17 
18  auto& __restrict__ data = *pdata;
19  auto& __restrict__ ws = *pws;
20  auto nt = ws.ntrks;
21  float const* __restrict__ zt = ws.zt;
22  float const* __restrict__ ezt2 = ws.ezt2;
23  float* __restrict__ zv = data.zv;
24  float* __restrict__ wv = data.wv;
25  float const* __restrict__ chi2 = data.chi2;
26  uint32_t& nvFinal = data.nvFinal;
27 
28  int32_t const* __restrict__ nn = data.ndof;
29  int32_t* __restrict__ iv = ws.iv;
30 
31  assert(pdata);
32  assert(zt);
33 
34  // one vertex per block
35  for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) {
36  if (nn[kv] < 4)
37  continue;
38  if (chi2[kv] < maxChi2 * float(nn[kv]))
39  continue;
40 
41  constexpr int MAXTK = 512;
42  assert(nn[kv] < MAXTK);
43  if (nn[kv] >= MAXTK)
44  continue; // too bad FIXME
45  __shared__ uint32_t it[MAXTK]; // track index
46  __shared__ float zz[MAXTK]; // z pos
47  __shared__ uint8_t newV[MAXTK]; // 0 or 1
48  __shared__ float ww[MAXTK]; // z weight
49 
50  __shared__ uint32_t nq; // number of track for this vertex
51  nq = 0;
52  __syncthreads();
53 
54  // copy to local
55  for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
56  if (iv[k] == int(kv)) {
57  auto old = atomicInc(&nq, MAXTK);
58  zz[old] = zt[k] - zv[kv];
59  newV[old] = zz[old] < 0 ? 0 : 1;
60  ww[old] = 1.f / ezt2[k];
61  it[old] = k;
62  }
63  }
64 
65  __shared__ float znew[2], wnew[2]; // the new vertices
66 
67  __syncthreads();
68  assert(int(nq) == nn[kv] + 1);
69 
70  int maxiter = 20;
71  // kt-min....
72  bool more = true;
73  while (__syncthreads_or(more)) {
74  more = false;
75  if (0 == threadIdx.x) {
76  znew[0] = 0;
77  znew[1] = 0;
78  wnew[0] = 0;
79  wnew[1] = 0;
80  }
81  __syncthreads();
82  for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
83  auto i = newV[k];
84  atomicAdd(&znew[i], zz[k] * ww[k]);
85  atomicAdd(&wnew[i], ww[k]);
86  }
87  __syncthreads();
88  if (0 == threadIdx.x) {
89  znew[0] /= wnew[0];
90  znew[1] /= wnew[1];
91  }
92  __syncthreads();
93  for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
94  auto d0 = fabs(zz[k] - znew[0]);
95  auto d1 = fabs(zz[k] - znew[1]);
96  auto newer = d0 < d1 ? 0 : 1;
97  more |= newer != newV[k];
98  newV[k] = newer;
99  }
100  --maxiter;
101  if (maxiter <= 0)
102  more = false;
103  }
104 
105  // avoid empty vertices
106  if (0 == wnew[0] || 0 == wnew[1])
107  continue;
108 
109  // quality cut
110  auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
111 
112  auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
113 
114  if (verbose && 0 == threadIdx.x)
115  printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
116 
117  if (chi2Dist < 4)
118  continue;
119 
120  // get a new global vertex
121  __shared__ uint32_t igv;
122  if (0 == threadIdx.x)
123  igv = atomicAdd(&ws.nvIntermediate, 1);
124  __syncthreads();
125  for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
126  if (1 == newV[k])
127  iv[it[k]] = igv;
128  }
129 
130  } // loop on vertices
131  }
132 
133  __global__ void splitVerticesKernel(ZVertices* pdata, WorkSpace* pws, float maxChi2) {
134  splitVertices(pdata, pws, maxChi2);
135  }
136 
137 } // namespace gpuVertexFinder
138 
139 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuSplitVertices_h
gpuVertexFinder::WorkSpace
Definition: gpuVertexFinder.h:15
mps_fire.i
i
Definition: mps_fire.py:428
geometryCSVtoXML.zz
zz
Definition: geometryCSVtoXML.py:19
gpuVertexFinder::assert
assert(pdata)
gpuVertexFinder::chi2
float *__restrict__ chi2
Definition: gpuFitVertices.h:28
gpuVertexFinder::iv
int32_t *__restrict__ iv
Definition: gpuClusterTracksDBSCAN.h:42
gpuVertexFinder
Definition: gpuClusterTracksByDensity.h:13
gpuVertexFinder.h
gpuVertexFinder::ws
auto &__restrict__ ws
Definition: gpuClusterTracksDBSCAN.h:32
gpuVertexFinder::zv
float *__restrict__ zv
Definition: gpuFitVertices.h:26
gpuVertexFinder::wv
float *__restrict__ wv
Definition: gpuFitVertices.h:27
gpuVertexFinder::zt
float const *__restrict__ zt
Definition: gpuClusterTracksDBSCAN.h:34
cms::cudacompat::__syncthreads_or
bool __syncthreads_or(bool x)
Definition: cudaCompat.h:110
__global__
#define __global__
Definition: cudaCompat.h:19
gpuVertexFinder::data
auto &__restrict__ data
Definition: gpuClusterTracksDBSCAN.h:31
gpuVertexFinder::nvFinal
uint32_t & nvFinal
Definition: gpuClusterTracksDBSCAN.h:37
gpuVertexFinder::more
bool more
Definition: gpuClusterTracksIterative.h:108
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
verbose
static constexpr int verbose
Definition: HLTExoticaSubAnalysis.cc:25
dqmdumpme.k
k
Definition: dqmdumpme.py:60
gpuVertexFinder::nt
auto nt
Definition: gpuClusterTracksDBSCAN.h:33
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
maxiter
static const MaxIter maxiter
Definition: HelixArbitraryPlaneCrossing.cc:30
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
gpuVertexFinder::pws
WorkSpace * pws
Definition: gpuClusterTracksDBSCAN.h:18
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
HistoContainer.h
gpuVertexFinder::maxChi2
__device__ WorkSpace float maxChi2
Definition: gpuSplitVertices.h:15
ZVertexSoA
Definition: ZVertexSoA.h:10
dqmPostProcessing_online.newer
newer
Definition: dqmPostProcessing_online.py:130
gpuVertexFinder::ezt2
float const *__restrict__ ezt2
Definition: gpuClusterTracksDBSCAN.h:35
cuda_assert.h
gpuVertexFinder::__syncthreads
__syncthreads()
Definition: cudaCompat.h:108
d0
static constexpr float d0
Definition: L1EGammaCrystalsEmulatorProducer.cc:85
gpuVertexFinder::ZVertices
ZVertexSoA ZVertices
Definition: gpuVertexFinder.h:11
gpuVertexFinder::nn
int32_t *__restrict__ nn
Definition: gpuClusterTracksDBSCAN.h:41
d1
static constexpr float d1
Definition: L1EGammaCrystalsEmulatorProducer.cc:85
cms::cudacompat::atomicInc
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32