CMS 3D CMS Logo

gpuFitVertices.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
10 
11 #include "gpuVertexFinder.h"
12 
13 namespace gpuVertexFinder {
14 
16  WsSoAView& pws,
17  float chi2Max // for outlier rejection
18  ) {
19  constexpr bool verbose = false; // in principle the compiler should optmize out if false
20 
21  auto& __restrict__ data = pdata;
22  auto& __restrict__ ws = pws;
23  auto nt = ws.ntrks();
24  float const* __restrict__ zt = ws.zt();
25  float const* __restrict__ ezt2 = ws.ezt2();
26  float* __restrict__ zv = data.zv();
27  float* __restrict__ wv = data.wv();
28  float* __restrict__ chi2 = data.chi2();
29  uint32_t& nvFinal = data.nvFinal();
30  uint32_t& nvIntermediate = ws.nvIntermediate();
31 
32  int32_t* __restrict__ nn = data.ndof();
33  int32_t* __restrict__ iv = ws.iv();
34 
37  auto foundClusters = nvFinal;
38 
39  // zero
40  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
41  zv[i] = 0;
42  wv[i] = 0;
43  chi2[i] = 0;
44  }
45 
46  // only for test
47  __shared__ int noise;
48  if (verbose && 0 == threadIdx.x)
49  noise = 0;
50 
51  __syncthreads();
52 
53  // compute cluster location
54  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
55  if (iv[i] > 9990) {
56  if (verbose)
57  atomicAdd(&noise, 1);
58  continue;
59  }
60  assert(iv[i] >= 0);
61  assert(iv[i] < int(foundClusters));
62  auto w = 1.f / ezt2[i];
63  atomicAdd_block(&zv[iv[i]], zt[i] * w);
64  atomicAdd_block(&wv[iv[i]], w);
65  }
66 
67  __syncthreads();
68  // reuse nn
69  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
70  assert(wv[i] > 0.f);
71  zv[i] /= wv[i];
72  nn[i] = -1; // ndof
73  }
74  __syncthreads();
75 
76  // compute chi2
77  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
78  if (iv[i] > 9990)
79  continue;
80 
81  auto c2 = zv[iv[i]] - zt[i];
82  c2 *= c2 / ezt2[i];
83  if (c2 > chi2Max) {
84  iv[i] = 9999;
85  continue;
86  }
87  atomicAdd_block(&chi2[iv[i]], c2);
88  atomicAdd_block(&nn[iv[i]], 1);
89  }
90  __syncthreads();
91  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x)
92  if (nn[i] > 0)
93  wv[i] *= float(nn[i]) / chi2[i];
94 
95  if (verbose && 0 == threadIdx.x)
96  printf("found %d proto clusters ", foundClusters);
97  if (verbose && 0 == threadIdx.x)
98  printf("and %d noise\n", noise);
99  }
100 
101  __global__ void fitVerticesKernel(VtxSoAView pdata,
102  WsSoAView pws,
103  float chi2Max // for outlier rejection
104  ) {
105  fitVertices(pdata, pws, chi2Max);
106  }
107 
108 } // namespace gpuVertexFinder
109 
110 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
const dim3 threadIdx
Definition: cudaCompat.h:29
float *__restrict__ chi2
#define __forceinline__
Definition: cudaCompat.h:22
int32_t *__restrict__ iv
T w() const
auto &__restrict__ data
fitVertices(pdata, pws, maxChi2ForFirstFit)
#define __global__
Definition: cudaCompat.h:19
float *__restrict__ wv
float *__restrict__ zv
const dim3 blockDim
Definition: cudaCompat.h:30
float const *__restrict__ zt
auto &__restrict__ ws
__device__ WsSoAView & pws
double f[11][100]
float const *__restrict__ ezt2
__shared__ int noise
T1 atomicAdd_block(T1 *a, T2 b)
Definition: cudaCompat.h:68
gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView WsSoAView
int32_t *__restrict__ nn
zVertex::ZVertexSoAView VtxSoAView
#define __device__
__shared__ unsigned int foundClusters
__device__ WsSoAView float chi2Max
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61