10 #undef PIXVERTEX_DEBUG_PRODUCE
12 namespace gpuVertexFinder {
53 data.ezt2[it] =
fit.covariance(idx)(14);
67 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
75 sortByPt2(pdata, pws);
85 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
93 sortByPt2(pdata, pws);
99 #ifdef PIXVERTEX_DEBUG_PRODUCE
100 std::cout <<
"producing Vertices on GPU" << std::endl;
101 #endif // PIXVERTEX_DEBUG_PRODUCE
105 #ifdef PIXVERTEX_DEBUG_PRODUCE
106 std::cout <<
"producing Vertices on CPU" << std::endl;
107 #endif // PIXVERTEX_DEBUG_PRODUCE
111 auto* soa = vertices.
get();
115 auto ws_d = cms::cuda::make_device_unique<WorkSpace>(
stream);
117 auto ws_d = std::make_unique<WorkSpace>();
121 init<<<1, 1, 0, stream>>>(
soa, ws_d.get());
122 auto blockSize = 128;
123 auto numberOfBlocks = (
TkSoA::stride() + blockSize - 1) / blockSize;
124 loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa,
soa, ws_d.get(),
ptMin,
ptMax);
127 init(soa, ws_d.get());
128 loadTracks(tksoa, soa, ws_d.get(),
ptMin,
ptMax);
133 constexpr
int maxThreadsForPrint = 1024 - 128;
134 constexpr
int numBlocks = 1024;
135 constexpr
int threadsPerBlock = 128;
139 #ifndef THREE_KERNELS
145 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
147 vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
151 clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get(),
minT,
eps,
errmax,
chi2max);
161 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
165 sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
176 #ifdef PIXVERTEX_DEBUG_PRODUCE
177 std::cout <<
"found " << (*ws_d).nvIntermediate <<
" vertices " << std::endl;
178 #endif // PIXVERTEX_DEBUG_PRODUCE
183 sortByPt2(soa, ws_d.get());
uint32_t const *__restrict__ TkSoA const *__restrict__ ptracks
static constexpr int32_t stride()
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
WorkSpace int float float float chi2max
WorkSpace int float float errmax
ZVertexHeterogeneous make(TkSoA const *tksoa, float ptMin, float ptMax) const
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
constexpr float maxChi2ForFinalFit
ZVertexHeterogeneous makeAsync(cudaStream_t stream, TkSoA const *tksoa, float ptMin, float ptMax) const
#define cudaCheck(ARG,...)
constexpr float maxChi2ForSplit
ZVertexSoA WorkSpace float float ptMax
T1 atomicAdd(T1 *a, T2 b)
ZVertexSoA WorkSpace float ptMin