CMS 3D CMS Logo

gpuFitVertices.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
10 
11 #include "gpuVertexFinder.h"
12 
13 namespace gpuVertexFinder {
14 
15  __device__ __forceinline__ void fitVertices(ZVertices* pdata,
16  WorkSpace* pws,
17  float chi2Max // for outlier rejection
18  ) {
19  constexpr bool verbose = false; // in principle the compiler should optmize out if false
20 
21  auto& __restrict__ data = *pdata;
22  auto& __restrict__ ws = *pws;
23  auto nt = ws.ntrks;
24  float const* __restrict__ zt = ws.zt;
25  float const* __restrict__ ezt2 = ws.ezt2;
26  float* __restrict__ zv = data.zv;
27  float* __restrict__ wv = data.wv;
28  float* __restrict__ chi2 = data.chi2;
29  uint32_t& nvFinal = data.nvFinal;
30  uint32_t& nvIntermediate = ws.nvIntermediate;
31 
32  int32_t* __restrict__ nn = data.ndof;
33  int32_t* __restrict__ iv = ws.iv;
34 
35  assert(pdata);
36  assert(zt);
37 
40  auto foundClusters = nvFinal;
41 
42  // zero
43  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
44  zv[i] = 0;
45  wv[i] = 0;
46  chi2[i] = 0;
47  }
48 
49  // only for test
50  __shared__ int noise;
51  if (verbose && 0 == threadIdx.x)
52  noise = 0;
53 
54  __syncthreads();
55 
56  // compute cluster location
57  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
58  if (iv[i] > 9990) {
59  if (verbose)
60  atomicAdd(&noise, 1);
61  continue;
62  }
63  assert(iv[i] >= 0);
64  assert(iv[i] < int(foundClusters));
65  auto w = 1.f / ezt2[i];
66  atomicAdd_block(&zv[iv[i]], zt[i] * w);
67  atomicAdd_block(&wv[iv[i]], w);
68  }
69 
70  __syncthreads();
71  // reuse nn
72  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
73  assert(wv[i] > 0.f);
74  zv[i] /= wv[i];
75  nn[i] = -1; // ndof
76  }
77  __syncthreads();
78 
79  // compute chi2
80  for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
81  if (iv[i] > 9990)
82  continue;
83 
84  auto c2 = zv[iv[i]] - zt[i];
85  c2 *= c2 / ezt2[i];
86  if (c2 > chi2Max) {
87  iv[i] = 9999;
88  continue;
89  }
90  atomicAdd_block(&chi2[iv[i]], c2);
91  atomicAdd_block(&nn[iv[i]], 1);
92  }
93  __syncthreads();
94  for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x)
95  if (nn[i] > 0)
96  wv[i] *= float(nn[i]) / chi2[i];
97 
98  if (verbose && 0 == threadIdx.x)
99  printf("found %d proto clusters ", foundClusters);
100  if (verbose && 0 == threadIdx.x)
101  printf("and %d noise\n", noise);
102  }
103 
104  __global__ void fitVerticesKernel(ZVertices* pdata,
105  WorkSpace* pws,
106  float chi2Max // for outlier rejection
107  ) {
108  fitVertices(pdata, pws, chi2Max);
109  }
110 
111 } // namespace gpuVertexFinder
112 
113 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
mps_fire.i
i
Definition: mps_fire.py:428
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
gpuVertexFinder::assert
assert(pdata)
gpuVertexFinder::chi2
float *__restrict__ chi2
Definition: gpuFitVertices.h:28
gpuVertexFinder::chi2Max
__device__ WorkSpace float chi2Max
Definition: gpuFitVertices.h:18
gpuVertexFinder::iv
int32_t *__restrict__ iv
Definition: gpuClusterTracksDBSCAN.h:42
gpuVertexFinder
Definition: gpuClusterTracksByDensity.h:13
gpuVertexFinder.h
gpuVertexFinder::ws
auto &__restrict__ ws
Definition: gpuClusterTracksDBSCAN.h:32
gpuVertexFinder::zv
float *__restrict__ zv
Definition: gpuFitVertices.h:26
gpuVertexFinder::wv
float *__restrict__ wv
Definition: gpuFitVertices.h:27
gpuVertexFinder::zt
float const *__restrict__ zt
Definition: gpuClusterTracksDBSCAN.h:34
__global__
#define __global__
Definition: cudaCompat.h:19
gpuVertexFinder::data
auto &__restrict__ data
Definition: gpuClusterTracksDBSCAN.h:31
gpuVertexFinder::nvFinal
uint32_t & nvFinal
Definition: gpuClusterTracksDBSCAN.h:37
w
const double w
Definition: UKUtility.cc:23
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
verbose
static constexpr int verbose
Definition: HLTExoticaSubAnalysis.cc:25
gpuVertexFinder::nt
auto nt
Definition: gpuClusterTracksDBSCAN.h:33
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
gpuVertexFinder::pws
WorkSpace * pws
Definition: gpuClusterTracksDBSCAN.h:18
gpuVertexFinder::noise
__shared__ int noise
Definition: gpuFitVertices.h:50
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
HistoContainer.h
gpuVertexFinder::ezt2
float const *__restrict__ ezt2
Definition: gpuClusterTracksDBSCAN.h:35
cms::cudacompat::atomicAdd_block
T1 atomicAdd_block(T1 *a, T2 b)
Definition: cudaCompat.h:68
gpuVertexFinder::foundClusters
__shared__ unsigned int foundClusters
Definition: gpuClusterTracksDBSCAN.h:199
cuda_assert.h
gpuVertexFinder::__syncthreads
__syncthreads()
Definition: cudaCompat.h:108
gpuVertexFinder::ZVertices
ZVertexSoA ZVertices
Definition: gpuVertexFinder.h:11
gpuVertexFinder::nn
int32_t *__restrict__ nn
Definition: gpuClusterTracksDBSCAN.h:41
gpuVertexFinder::nvIntermediate
uint32_t & nvIntermediate
Definition: gpuClusterTracksDBSCAN.h:38