10 #undef PIXVERTEX_DEBUG_PRODUCE 75 sortByPt2(pdata,
pws);
93 sortByPt2(pdata,
pws);
99 #ifdef PIXVERTEX_DEBUG_PRODUCE 100 std::cout <<
"producing Vertices on GPU" << std::endl;
101 #endif // PIXVERTEX_DEBUG_PRODUCE 105 #ifdef PIXVERTEX_DEBUG_PRODUCE 106 std::cout <<
"producing Vertices on CPU" << std::endl;
107 #endif // PIXVERTEX_DEBUG_PRODUCE 115 auto ws_d = cms::cuda::make_device_unique<WorkSpace>(
stream);
117 auto ws_d = std::make_unique<WorkSpace>();
121 init<<<1, 1, 0, stream>>>(
soa, ws_d.get());
122 auto blockSize = 128;
123 auto numberOfBlocks = (
TkSoA::stride() + blockSize - 1) / blockSize;
124 loadTracks<<<numberOfBlocks, blockSize, 0, stream>>>(tksoa,
soa, ws_d.get(),
ptMin,
ptMax);
133 constexpr
int maxThreadsForPrint = 1024 - 128;
134 constexpr
int numBlocks = 1024;
135 constexpr
int threadsPerBlock = 128;
139 #ifndef THREE_KERNELS 145 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
147 vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
151 clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get(),
minT,
eps,
errmax,
chi2max);
161 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
soa, ws_d.get(),
maxChi2ForSplit);
165 sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(
soa, ws_d.get());
176 #ifdef PIXVERTEX_DEBUG_PRODUCE 177 std::cout <<
"found " << (*ws_d).nvIntermediate <<
" vertices " << std::endl;
178 #endif // PIXVERTEX_DEBUG_PRODUCE 183 sortByPt2(
soa, ws_d.get());
ZVertexHeterogeneous makeAsync(cudaStream_t stream, TkSoA const *tksoa, float ptMin, float ptMax) const
static constexpr int32_t stride()
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr float maxChi2ForFirstFit
WorkSpace int float float float chi2max
uint32_t const *__restrict__ TkSoA const *__restrict__ ptracks
WorkSpace int float float errmax
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
constexpr float maxChi2ForFinalFit
ZVertexHeterogeneous make(TkSoA const *tksoa, float ptMin, float ptMax) const
#define cudaCheck(ARG,...)
constexpr float maxChi2ForSplit
ZVertexSoA WorkSpace float float ptMax
T1 atomicAdd(T1 *a, T2 b)
ZVertexSoA WorkSpace float ptMin