CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
List of all members | Public Types | Public Member Functions | Private Attributes
gpuVertexFinder::Producer Class Reference

#include <gpuVertexFinder.h>

Public Types

using TkSoA = pixelTrack::TrackSoA
 
using WorkSpace = gpuVertexFinder::WorkSpace
 
using ZVertices = ZVertexSoA
 

Public Member Functions

ZVertexHeterogeneous make (TkSoA const *tksoa, float ptMin, float ptMax) const
 
ZVertexHeterogeneous makeAsync (cudaStream_t stream, TkSoA const *tksoa, float ptMin, float ptMax) const
 
 Producer (bool oneKernel, bool useDensity, bool useDBSCAN, bool useIterative, int iminT, float ieps, float ierrmax, float ichi2max)
 
 ~Producer ()=default
 

Private Attributes

float chi2max
 
float eps
 
float errmax
 
int minT
 
const bool oneKernel_
 
const bool useDBSCAN_
 
const bool useDensity_
 
const bool useIterative_
 

Detailed Description

Definition at line 41 of file gpuVertexFinder.h.

Member Typedef Documentation

Definition at line 45 of file gpuVertexFinder.h.

Definition at line 44 of file gpuVertexFinder.h.

Definition at line 43 of file gpuVertexFinder.h.

Constructor & Destructor Documentation

gpuVertexFinder::Producer::Producer ( bool  oneKernel,
bool  useDensity,
bool  useDBSCAN,
bool  useIterative,
int  iminT,
float  ieps,
float  ierrmax,
float  ichi2max 
)
inline

Definition at line 47 of file gpuVertexFinder.h.

56  : oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
57  useDensity_(useDensity),
58  useDBSCAN_(useDBSCAN),
59  useIterative_(useIterative),
60  minT(iminT),
61  eps(ieps),
62  errmax(ierrmax),
63  chi2max(ichi2max) {}
gpuVertexFinder::Producer::~Producer ( )
default

Member Function Documentation

ZVertexHeterogeneous gpuVertexFinder::Producer::make ( TkSoA const *  tksoa,
float  ptMin,
float  ptMax 
) const

Definition at line 104 of file gpuVertexFinder.cc.

References gpuVertexFinder::assert(), chi2max, gather_cfg::cout, cudaCheck, eps, errmax, HeterogeneousSoA< T >::get(), gpuVertexFinder::init(), gpuVertexFinder::maxChi2ForFinalFit, gpuVertexFinder::maxChi2ForFirstFit, gpuVertexFinder::maxChi2ForSplit, minT, oneKernel_, gpuVertexFinder::ptMax, gpuVertexFinder::ptMin, gpuVertexFinder::soa, cms::cuda::stream, TrackSoAHeterogeneousT< S >::stride(), useDBSCAN_, useDensity_, useIterative_, and beam_dqm_sourceclient-live_cfg::vertices.

Referenced by PixelVertexProducerCUDA::produceOnCPU().

104  {
105 #ifdef PIXVERTEX_DEBUG_PRODUCE
106  std::cout << "producing Vertices on CPU" << std::endl;
107 #endif // PIXVERTEX_DEBUG_PRODUCE
108  ZVertexHeterogeneous vertices(std::make_unique<ZVertexSoA>());
109 #endif
110  assert(tksoa);
111  auto* soa = vertices.get();
112  assert(soa);
113 
114 #ifdef __CUDACC__
115  auto ws_d = cms::cuda::make_device_unique<WorkSpace>(stream);
116 #else
117  auto ws_d = std::make_unique<WorkSpace>();
118 #endif
119 
120 #ifdef __CUDACC__
121  init<<<1, 1, 0, stream>>>(soa, ws_d.get());
122  auto blockSize = 128;
123  auto numberOfBlocks = (TkSoA::stride() + blockSize - 1) / blockSize;
124  loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa, soa, ws_d.get(), ptMin, ptMax);
125  cudaCheck(cudaGetLastError());
126 #else
127  init(soa, ws_d.get());
128  loadTracks(tksoa, soa, ws_d.get(), ptMin, ptMax);
129 #endif
130 
131 #ifdef __CUDACC__
132  // Running too many thread lead to problems when printf is enabled.
133  constexpr int maxThreadsForPrint = 1024 - 128;
134  constexpr int numBlocks = 1024;
135  constexpr int threadsPerBlock = 128;
136 
137  if (oneKernel_) {
138  // implemented only for density clustesrs
139 #ifndef THREE_KERNELS
140  vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
141 #else
142  vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
143  cudaCheck(cudaGetLastError());
144  // one block per vertex...
145  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.get(), maxChi2ForSplit);
146  cudaCheck(cudaGetLastError());
147  vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get());
148 #endif
149  } else { // five kernels
150  if (useDensity_) {
151  clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
152  } else if (useDBSCAN_) {
153  clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
154  } else if (useIterative_) {
155  clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
156  }
157  cudaCheck(cudaGetLastError());
158  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), maxChi2ForFirstFit);
159  cudaCheck(cudaGetLastError());
160  // one block per vertex...
161  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.get(), maxChi2ForSplit);
162  cudaCheck(cudaGetLastError());
163  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), maxChi2ForFinalFit);
164  cudaCheck(cudaGetLastError());
165  sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get());
166  }
167  cudaCheck(cudaGetLastError());
168 #else // __CUDACC__
169  if (useDensity_) {
170  clusterTracksByDensity(soa, ws_d.get(), minT, eps, errmax, chi2max);
171  } else if (useDBSCAN_) {
172  clusterTracksDBSCAN(soa, ws_d.get(), minT, eps, errmax, chi2max);
173  } else if (useIterative_) {
174  clusterTracksIterative(soa, ws_d.get(), minT, eps, errmax, chi2max);
175  }
176 #ifdef PIXVERTEX_DEBUG_PRODUCE
177  std::cout << "found " << (*ws_d).nvIntermediate << " vertices " << std::endl;
178 #endif // PIXVERTEX_DEBUG_PRODUCE
179  fitVertices(soa, ws_d.get(), maxChi2ForFirstFit);
180  // one block per vertex!
181  splitVertices(soa, ws_d.get(), maxChi2ForSplit);
182  fitVertices(soa, ws_d.get(), maxChi2ForFinalFit);
183  sortByPt2(soa, ws_d.get());
184 #endif
185 
186  return vertices;
187  }
static constexpr int32_t stride()
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
ZVertexSoA * soa
constexpr float maxChi2ForFinalFit
tuple cout
Definition: gather_cfg.py:144
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
constexpr float maxChi2ForSplit
ZVertexSoA WorkSpace float float ptMax
ZVertexSoA WorkSpace float ptMin
ZVertexHeterogeneous gpuVertexFinder::Producer::makeAsync ( cudaStream_t  stream,
TkSoA const *  tksoa,
float  ptMin,
float  ptMax 
) const

Member Data Documentation

float gpuVertexFinder::Producer::chi2max
private

Definition at line 79 of file gpuVertexFinder.h.

Referenced by make().

float gpuVertexFinder::Producer::eps
private

Definition at line 77 of file gpuVertexFinder.h.

Referenced by make().

float gpuVertexFinder::Producer::errmax
private

Definition at line 78 of file gpuVertexFinder.h.

Referenced by make().

int gpuVertexFinder::Producer::minT
private

Definition at line 76 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::oneKernel_
private

Definition at line 71 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useDBSCAN_
private

Definition at line 73 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useDensity_
private

Definition at line 72 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useIterative_
private

Definition at line 74 of file gpuVertexFinder.h.

Referenced by make().