CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
List of all members | Public Types | Public Member Functions | Private Attributes
gpuVertexFinder::Producer Class Reference

#include <gpuVertexFinder.h>

Public Types

using TkSoA = pixelTrack::TrackSoA
 
using WorkSpace = gpuVertexFinder::WorkSpace
 
using ZVertices = ZVertexSoA
 

Public Member Functions

ZVertexHeterogeneous make (TkSoA const *tksoa, float ptMin) const
 
ZVertexHeterogeneous makeAsync (cudaStream_t stream, TkSoA const *tksoa, float ptMin) const
 
 Producer (bool oneKernel, bool useDensity, bool useDBSCAN, bool useIterative, int iminT, float ieps, float ierrmax, float ichi2max)
 
 ~Producer ()=default
 

Private Attributes

float chi2max
 
float eps
 
float errmax
 
int minT
 
const bool oneKernel_
 
const bool useDBSCAN_
 
const bool useDensity_
 
const bool useIterative_
 

Detailed Description

Definition at line 40 of file gpuVertexFinder.h.

Member Typedef Documentation

Definition at line 44 of file gpuVertexFinder.h.

Definition at line 43 of file gpuVertexFinder.h.

Definition at line 42 of file gpuVertexFinder.h.

Constructor & Destructor Documentation

gpuVertexFinder::Producer::Producer ( bool  oneKernel,
bool  useDensity,
bool  useDBSCAN,
bool  useIterative,
int  iminT,
float  ieps,
float  ierrmax,
float  ichi2max 
)
inline

Definition at line 46 of file gpuVertexFinder.h.

55  : oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
56  useDensity_(useDensity),
57  useDBSCAN_(useDBSCAN),
58  useIterative_(useIterative),
59  minT(iminT),
60  eps(ieps),
61  errmax(ierrmax),
62  chi2max(ichi2max) {}
gpuVertexFinder::Producer::~Producer ( )
default

Member Function Documentation

ZVertexHeterogeneous gpuVertexFinder::Producer::make ( TkSoA const *  tksoa,
float  ptMin 
) const

Definition at line 102 of file gpuVertexFinder.cc.

References gpuVertexFinder::assert(), chi2max, gather_cfg::cout, cudaCheck, eps, errmax, HeterogeneousSoA< T >::get(), gpuVertexFinder::init(), gpuVertexFinder::maxChi2ForFinalFit, gpuVertexFinder::maxChi2ForFirstFit, gpuVertexFinder::maxChi2ForSplit, minT, oneKernel_, gpuVertexFinder::ptMin, gpuVertexFinder::soa, cms::cuda::stream, TrackSoAHeterogeneousT< S >::stride(), useDBSCAN_, useDensity_, useIterative_, and beam_dqm_sourceclient-live_cfg::vertices.

Referenced by PixelVertexProducerCUDA::produceOnCPU().

102  {
103 #ifdef PIXVERTEX_DEBUG_PRODUCE
104  std::cout << "producing Vertices on CPU" << std::endl;
105 #endif // PIXVERTEX_DEBUG_PRODUCE
106  ZVertexHeterogeneous vertices(std::make_unique<ZVertexSoA>());
107 #endif
108  assert(tksoa);
109  auto* soa = vertices.get();
110  assert(soa);
111 
112 #ifdef __CUDACC__
113  auto ws_d = cms::cuda::make_device_unique<WorkSpace>(stream);
114 #else
115  auto ws_d = std::make_unique<WorkSpace>();
116 #endif
117 
118 #ifdef __CUDACC__
119  init<<<1, 1, 0, stream>>>(soa, ws_d.get());
120  auto blockSize = 128;
121  auto numberOfBlocks = (TkSoA::stride() + blockSize - 1) / blockSize;
122  loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa, soa, ws_d.get(), ptMin);
123  cudaCheck(cudaGetLastError());
124 #else
125  init(soa, ws_d.get());
126  loadTracks(tksoa, soa, ws_d.get(), ptMin);
127 #endif
128 
129 #ifdef __CUDACC__
130  // Running too many thread lead to problems when printf is enabled.
131  constexpr int maxThreadsForPrint = 1024 - 128;
132  constexpr int numBlocks = 1024;
133  constexpr int threadsPerBlock = 128;
134 
135  if (oneKernel_) {
136  // implemented only for density clustesrs
137 #ifndef THREE_KERNELS
138  vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
139 #else
140  vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
141  cudaCheck(cudaGetLastError());
142  // one block per vertex...
143  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.get(), maxChi2ForSplit);
144  cudaCheck(cudaGetLastError());
145  vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get());
146 #endif
147  } else { // five kernels
148  if (useDensity_) {
149  clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
150  } else if (useDBSCAN_) {
151  clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
152  } else if (useIterative_) {
153  clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), minT, eps, errmax, chi2max);
154  }
155  cudaCheck(cudaGetLastError());
156  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), maxChi2ForFirstFit);
157  cudaCheck(cudaGetLastError());
158  // one block per vertex...
159  splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.get(), maxChi2ForSplit);
160  cudaCheck(cudaGetLastError());
161  fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get(), maxChi2ForFinalFit);
162  cudaCheck(cudaGetLastError());
163  sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.get());
164  }
165  cudaCheck(cudaGetLastError());
166 #else // __CUDACC__
167  if (useDensity_) {
168  clusterTracksByDensity(soa, ws_d.get(), minT, eps, errmax, chi2max);
169  } else if (useDBSCAN_) {
170  clusterTracksDBSCAN(soa, ws_d.get(), minT, eps, errmax, chi2max);
171  } else if (useIterative_) {
172  clusterTracksIterative(soa, ws_d.get(), minT, eps, errmax, chi2max);
173  }
174 #ifdef PIXVERTEX_DEBUG_PRODUCE
175  std::cout << "found " << (*ws_d).nvIntermediate << " vertices " << std::endl;
176 #endif // PIXVERTEX_DEBUG_PRODUCE
177  fitVertices(soa, ws_d.get(), maxChi2ForFirstFit);
178  // one block per vertex!
179  splitVertices(soa, ws_d.get(), maxChi2ForSplit);
180  fitVertices(soa, ws_d.get(), maxChi2ForFinalFit);
181  sortByPt2(soa, ws_d.get());
182 #endif
183 
184  return vertices;
185  }
static constexpr int32_t stride()
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
ZVertexSoA * soa
constexpr float maxChi2ForFinalFit
tuple cout
Definition: gather_cfg.py:144
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
constexpr float maxChi2ForSplit
ZVertexSoA WorkSpace float ptMin
ZVertexHeterogeneous gpuVertexFinder::Producer::makeAsync ( cudaStream_t  stream,
TkSoA const *  tksoa,
float  ptMin 
) const

Member Data Documentation

float gpuVertexFinder::Producer::chi2max
private

Definition at line 78 of file gpuVertexFinder.h.

Referenced by make().

float gpuVertexFinder::Producer::eps
private

Definition at line 76 of file gpuVertexFinder.h.

Referenced by make().

float gpuVertexFinder::Producer::errmax
private

Definition at line 77 of file gpuVertexFinder.h.

Referenced by make().

int gpuVertexFinder::Producer::minT
private

Definition at line 75 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::oneKernel_
private

Definition at line 70 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useDBSCAN_
private

Definition at line 72 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useDensity_
private

Definition at line 71 of file gpuVertexFinder.h.

Referenced by make().

const bool gpuVertexFinder::Producer::useIterative_
private

Definition at line 73 of file gpuVertexFinder.h.

Referenced by make().