10 #undef PIXVERTEX_DEBUG_PRODUCE
12 namespace gpuVertexFinder {
51 data.ezt2[it] =
fit.covariance(idx)(14);
65 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
73 sortByPt2(pdata, pws);
83 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
91 sortByPt2(pdata, pws);
97 #ifdef PIXVERTEX_DEBUG_PRODUCE
98 std::cout <<
"producing Vertices on GPU" << std::endl;
99 #endif // PIXVERTEX_DEBUG_PRODUCE
103 #ifdef PIXVERTEX_DEBUG_PRODUCE
104 std::cout <<
"producing Vertices on CPU" << std::endl;
105 #endif // PIXVERTEX_DEBUG_PRODUCE
109 auto* soa = vertices.
get();
113 auto ws_d = cms::cuda::make_device_unique<WorkSpace>(
stream);
115 auto ws_d = std::make_unique<WorkSpace>();
119 init<<<1, 1, 0, stream>>>(
soa, ws_d.get());
120 auto blockSize = 128;
121 auto numberOfBlocks = (
TkSoA::stride() + blockSize - 1) / blockSize;
122 loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa,
soa, ws_d.get(),
ptMin);
125 init(soa, ws_d.get());
126 loadTracks(tksoa, soa, ws_d.get(),
ptMin);
131 constexpr
int maxThreadsForPrint = 1024 - 128;
132 constexpr
int numBlocks = 1024;
133 constexpr
int threadsPerBlock = 128;
137 #ifndef THREE_KERNELS
143 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
145 vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
149 clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get(),
minT,
eps,
errmax,
chi2max);
159 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
163 sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
174 #ifdef PIXVERTEX_DEBUG_PRODUCE
175 std::cout <<
"found " << (*ws_d).nvIntermediate <<
" vertices " << std::endl;
176 #endif // PIXVERTEX_DEBUG_PRODUCE
181 sortByPt2(soa, ws_d.get());
uint32_t const *__restrict__ TkSoA const *__restrict__ ptracks
static constexpr int32_t stride()
ZVertexHeterogeneous make(TkSoA const *tksoa, float ptMin) const
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
WorkSpace int float float float chi2max
WorkSpace int float float errmax
ZVertexHeterogeneous makeAsync(cudaStream_t stream, TkSoA const *tksoa, float ptMin) const
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
constexpr float maxChi2ForFinalFit
#define cudaCheck(ARG,...)
constexpr float maxChi2ForSplit
T1 atomicAdd(T1 *a, T2 b)
ZVertexSoA WorkSpace float ptMin