1 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuVertexFinderImpl_h
2 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuVertexFinderImpl_h
13 #undef PIXVERTEX_DEBUG_PRODUCE
76 sortByPt2(pdata,
pws);
94 sortByPt2(pdata,
pws);
100 #ifdef PIXVERTEX_DEBUG_PRODUCE
101 std::cout <<
"producing Vertices on GPU" << std::endl;
102 #endif // PIXVERTEX_DEBUG_PRODUCE
106 #ifdef PIXVERTEX_DEBUG_PRODUCE
107 std::cout <<
"producing Vertices on CPU" << std::endl;
108 #endif // PIXVERTEX_DEBUG_PRODUCE
116 auto ws_d = cms::cuda::make_device_unique<WorkSpace>(
stream);
118 auto ws_d = std::make_unique<WorkSpace>();
122 init<<<1, 1, 0, stream>>>(
soa, ws_d.get());
123 auto blockSize = 128;
124 auto numberOfBlocks = (
TkSoA::stride() + blockSize - 1) / blockSize;
125 loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa,
soa, ws_d.get(),
ptMin);
129 loadTracks(tksoa,
soa, ws_d.get(),
ptMin);
134 constexpr
int maxThreadsForPrint = 1024 - 256;
135 constexpr
int numBlocks = 1024;
136 constexpr
int threadsPerBlock = 128;
140 #ifndef THREE_KERNELS
146 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
148 vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
152 clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get(),
minT,
eps,
errmax,
chi2max);
162 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
166 sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
177 #ifdef PIXVERTEX_DEBUG_PRODUCE
178 std::cout <<
"found " << (*ws_d).nvIntermediate <<
" vertices " << std::endl;
179 #endif // PIXVERTEX_DEBUG_PRODUCE
184 sortByPt2(
soa, ws_d.get());
192 #endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuVertexFinderImpl_h