CMS 3D CMS Logo

gpuVertexFinder.cc
Go to the documentation of this file.
2 
5 
9 
11 #include "gpuClusterTracksDBSCAN.h"
13 #include "gpuFitVertices.h"
14 #include "gpuSortByPt2.h"
15 #include "gpuSplitVertices.h"
16 
17 #undef PIXVERTEX_DEBUG_PRODUCE
18 
19 namespace gpuVertexFinder {
20 
21  // reject outlier tracks that contribute more than this to the chi2 of the vertex fit
23  constexpr float maxChi2ForFinalFit = 5000.f;
24 
25  // split vertices with a chi2/NDoF greater than this
27 
28  template <typename TrackerTraits>
29  __global__ void loadTracks(
31  auto const* quality = tracks_view.quality();
33  auto first = blockIdx.x * blockDim.x + threadIdx.x;
34  for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) {
36  assert(nHits >= 3);
37 
38  // initialize soa...
39  soa[idx].idv() = -1;
40 
42  continue; // no triplets
44  continue;
45 
46  auto pt = tracks_view[idx].pt();
47 
48  if (pt < ptMin)
49  continue;
50 
51  // clamp pt
52  pt = std::min(pt, ptMax);
53 
54  auto& data = pws;
55  auto it = atomicAdd(&data.ntrks(), 1);
56  data[it].itrk() = idx;
57  data[it].zt() = helper::zip(tracks_view, idx);
58  data[it].ezt2() = tracks_view[idx].covariance()(14);
59  data[it].ptt2() = pt * pt;
60  }
61  }
62 
63 // #define THREE_KERNELS
64 #ifndef THREE_KERNELS
65  __global__ void vertexFinderOneKernel(VtxSoAView pdata,
66  WsSoAView pws,
67  int minT, // min number of neighbours to be "seed"
68  float eps, // max absolute distance to cluster
69  float errmax, // max error to be "seed"
70  float chi2max // max normalized distance to cluster,
71  ) {
72  clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
73  __syncthreads();
75  __syncthreads();
77  __syncthreads();
79  __syncthreads();
81  }
82 #else
83  __global__ void vertexFinderKernel1(VtxSoAView pdata,
84  WsSoAView pws,
85  int minT, // min number of neighbours to be "seed"
86  float eps, // max absolute distance to cluster
87  float errmax, // max error to be "seed"
88  float chi2max // max normalized distance to cluster,
89  ) {
90  clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
91  __syncthreads();
93  }
94 
95  __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) {
97  __syncthreads();
99  }
100 #endif
101 
102  template <typename TrackerTraits>
103 #ifdef __CUDACC__
106  float ptMin,
107  float ptMax) const {
108 #ifdef PIXVERTEX_DEBUG_PRODUCE
109  std::cout << "producing Vertices on GPU" << std::endl;
110 #endif // PIXVERTEX_DEBUG_PRODUCE
112 #else
114  float ptMin,
115  float ptMax) const {
116 #ifdef PIXVERTEX_DEBUG_PRODUCE
117  std::cout << "producing Vertices on CPU" << std::endl;
118 #endif // PIXVERTEX_DEBUG_PRODUCE
120 #endif
121  auto soa = vertices.view();
122 
123  assert(vertices.buffer());
124 
125 #ifdef __CUDACC__
127 #else
129 #endif
130 
131 #ifdef __CUDACC__
132  init<<<1, 1, 0, stream>>>(soa, ws_d.view());
133  auto blockSize = 128;
134  auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize;
135  loadTracks<TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
136  cudaCheck(cudaGetLastError());
137 #else
138  init(soa, ws_d.view());
139  loadTracks<TrackerTraits>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
140 #endif
141 
142 #ifdef __CUDACC__
143  // Running too many thread lead to problems when printf is enabled.
144  constexpr int maxThreadsForPrint = 1024 - 128;
145  constexpr int numBlocks = 1024;
146  constexpr int threadsPerBlock = 128;
147 
148  if (oneKernel_) {
149  // implemented only for density clustesrs
150 #ifndef THREE_KERNELS
151  vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
152 #else
153  vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
154  cudaCheck(cudaGetLastError());
155  // one block per vertex...
156  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
157  cudaCheck(cudaGetLastError());
158  vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
159 #endif
160  } else { // five kernels
161  if (useDensity_) {
162  clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
163  soa, ws_d.view(), minT, eps, errmax, chi2max);
164  } else if (useDBSCAN_) {
165  clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
166  } else if (useIterative_) {
167  clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
168  }
169  cudaCheck(cudaGetLastError());
170  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit);
171  cudaCheck(cudaGetLastError());
172  if (doSplitting_) {
173  // one block per vertex...
174  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
175  cudaCheck(cudaGetLastError());
176  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit);
177  cudaCheck(cudaGetLastError());
178  }
179  sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
180  }
181  cudaCheck(cudaGetLastError());
182 #else // __CUDACC__
183  if (useDensity_) {
184  clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max);
185  } else if (useDBSCAN_) {
186  clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max);
187  } else if (useIterative_) {
188  clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max);
189  }
190 #ifdef PIXVERTEX_DEBUG_PRODUCE
191  std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl;
192 #endif // PIXVERTEX_DEBUG_PRODUCE
193  fitVertices(soa, ws_d.view(), maxChi2ForFirstFit);
194  // one block per vertex!
195  if (doSplitting_) {
196  splitVertices(soa, ws_d.view(), maxChi2ForSplit);
197  fitVertices(soa, ws_d.view(), maxChi2ForFinalFit);
198  }
199  sortByPt2(soa, ws_d.view());
200 #endif
201 
202  return vertices;
203  }
204 
205  template class Producer<pixelTopology::Phase1>;
206  template class Producer<pixelTopology::Phase2>;
208 } // namespace gpuVertexFinder
ZVertexSoAHost make(const TkSoAConstView &tracks_view, float ptMin, float ptMax) const
const dim3 threadIdx
Definition: cudaCompat.h:29
Definition: helper.py:1
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
const dim3 gridDim
Definition: cudaCompat.h:33
ZVertexSoADevice makeAsync(cudaStream_t stream, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
static constexpr __host__ __device__ float zip(const TrackSoAConstView &tracks, int32_t i)
__device__ WsSoAView int float float float chi2max
auto &__restrict__ data
fitVertices(pdata, pws, maxChi2ForFirstFit)
#define __global__
Definition: cudaCompat.h:19
PixelVertexWorkSpaceSoAHost< zVertex::utilities::MAXTRACKS > PixelVertexWorkSpaceSoAHost
const dim3 blockDim
Definition: cudaCompat.h:30
int init
Definition: HydjetWrapper.h:66
VtxSoAView WsSoAView float ptMin
__device__ WsSoAView int float eps
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
sortByPt2(pdata, pws)
string quality
__device__ WsSoAView & pws
__device__ WsSoAView int minT
VtxSoAView WsSoAView float float ptMax
splitVertices(pdata, pws, maxChi2ForSplit)
const dim3 blockIdx
Definition: cudaCompat.h:32
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
__device__ WsSoAView int float float errmax
gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView WsSoAView
constexpr float maxChi2ForFinalFit
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::ConstView TrackSoAConstView
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
constexpr float maxChi2ForSplit
TupleMultiplicity< TrackerTraits > const *__restrict__ uint32_t nHits
zVertex::ZVertexSoAView VtxSoAView
PixelVertexWorkSpaceSoADevice< zVertex::utilities::MAXTRACKS > PixelVertexWorkSpaceSoADevice
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61