2 #ifdef DUMP_GPU_TK_TUPLES 7 #include <alpaka/alpaka.hpp> 27 template <
typename TrackerTraits>
36 counters_{cms::alpakatools::make_device_buffer<Counters>(
queue)},
39 device_hitToTuple_{cms::alpakatools::make_device_buffer<HitToTuple>(
queue)},
40 device_hitToTupleStorage_{
41 cms::alpakatools::make_device_buffer<typename HitToTuple::Counter[]>(
queue,
nhits + 1)},
42 device_tupleMultiplicity_{cms::alpakatools::make_device_buffer<TupleMultiplicity>(
queue)},
46 cms::alpakatools::make_device_buffer<CACell[]>(
queue, m_params.caParams_.maxNumberOfDoublets_)},
48 device_isOuterHitOfCell_{
49 cms::alpakatools::make_device_buffer<OuterHitOfCellContainer[]>(
queue,
std::max(1u,
nhits - offsetBPIX2))},
50 isOuterHitOfCell_{cms::alpakatools::make_device_buffer<OuterHitOfCell>(
queue)},
52 device_theCellNeighbors_{cms::alpakatools::make_device_buffer<CellNeighborsVector>(
queue)},
53 device_theCellTracks_{cms::alpakatools::make_device_buffer<CellTracksVector>(
queue)},
55 cellStorage_{cms::alpakatools::make_device_buffer<unsigned char[]>(
57 TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellNeighbors) +
58 TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellTracks))},
59 device_cellCuts_{cms::alpakatools::make_device_buffer<CellCuts>(
queue)},
60 device_theCellNeighborsContainer_{
reinterpret_cast<CellNeighbors *
>(cellStorage_.data())},
61 device_theCellTracksContainer_{
reinterpret_cast<CellTracks *
>(
62 cellStorage_.data() + TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellNeighbors))},
66 cms::alpakatools::make_device_buffer<cms::alpakatools::AtomicPairCounter::DoubleWord[]>(
queue, 3u)},
70 *reinterpret_cast<uint32_t *>(device_storage_.data() + 2))} {
72 std::cout <<
"Allocation for tuple building. N hits " <<
nhits << std::endl;
75 alpaka::memset(
queue, counters_, 0);
76 alpaka::memset(
queue, device_nCells_, 0);
77 alpaka::memset(
queue, cellStorage_, 0);
80 alpaka::memcpy(
queue, device_cellCuts_, cellCuts_h);
82 [[maybe_unused]]
TupleMultiplicity *tupleMultiplicityDeviceData = device_tupleMultiplicity_.data();
84 TrackerTraits::maxHitsOnTrack + 1,
85 TrackerTraits::maxNumberOfTuples>;
86 TM *tm = device_tupleMultiplicity_.data();
90 device_hitToTupleView_.assoc = device_hitToTuple_.data();
91 device_hitToTupleView_.offStorage = device_hitToTupleStorage_.data();
92 device_hitToTupleView_.offSize =
nhits + 1;
96 std::cout <<
"Allocations for CAHitNtupletGeneratorKernels: done!" << std::endl;
100 template <
typename TrackerTraits>
102 uint32_t offsetBPIX2,
105 using namespace caPixelDoublets;
111 uint32_t
nhits =
hh.metadata().size();
114 std::cout <<
"start tuple building. N hits " <<
nhits << std::endl;
123 const auto nthTot = 64;
125 auto blockSize = nthTot /
stride;
127 const auto rescale = numberOfBlocks / 65536;
128 blockSize *= (rescale + 1);
130 assert(numberOfBlocks < 65536);
131 assert(blockSize > 0 && 0 == blockSize % 16);
132 const Vec2D blks{numberOfBlocks, 1u};
134 const auto kernelConnectWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
136 alpaka::exec<Acc2D>(
queue,
137 kernelConnectWorkDiv,
138 Kernel_connect<TrackerTraits>{},
139 this->device_hitTuple_apc_,
140 this->device_hitToTuple_apc_,
142 this->device_theCells_.data(),
143 this->device_nCells_.data(),
144 this->device_theCellNeighbors_.data(),
145 this->isOuterHitOfCell_.data(),
146 this->m_params.caParams_);
149 if (this->m_params.earlyFishbone_ and
nhits > offsetBPIX2) {
150 const auto nthTot = 128;
152 const auto blockSize = nthTot /
stride;
154 const Vec2D blks{numberOfBlocks, 1u};
156 const auto fishboneWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
157 alpaka::exec<Acc2D>(
queue,
161 this->device_theCells_.data(),
162 this->device_nCells_.data(),
163 this->isOuterHitOfCell_.data(),
169 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
170 alpaka::exec<Acc1D>(
queue,
172 Kernel_find_ntuplets<TrackerTraits>{},
175 this->device_theCells_.data(),
176 this->device_nCells_.data(),
177 this->device_theCellTracks_.data(),
178 this->device_hitTuple_apc_,
179 this->m_params.caParams_);
184 if (this->m_params.doStats_)
185 alpaka::exec<Acc1D>(
queue,
187 Kernel_mark_used<TrackerTraits>{},
188 this->device_theCells_.data(),
189 this->device_nCells_.data());
197 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
200 queue, workDiv1D,
typename HitContainer::finalizeBulk{}, this->device_hitTuple_apc_, &
tracks_view.hitIndices());
206 alpaka::exec<Acc1D>(
queue, workDiv1D, Kernel_fillHitDetIndices<TrackerTraits>{},
tracks_view,
hh);
211 alpaka::exec<Acc1D>(
queue, workDiv1D, Kernel_fillNLayers<TrackerTraits>{},
tracks_view, this->device_hitTuple_apc_);
219 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
221 alpaka::exec<Acc1D>(
queue,
223 Kernel_earlyDuplicateRemover<TrackerTraits>{},
224 this->device_theCells_.data(),
225 this->device_nCells_.data(),
227 this->m_params.dupPassThrough_);
234 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
236 alpaka::exec<Acc1D>(
queue,
238 Kernel_countMultiplicity<TrackerTraits>{},
240 this->device_tupleMultiplicity_.data());
243 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
245 queue, workDiv1D, Kernel_fillMultiplicity<TrackerTraits>{},
tracks_view, this->device_tupleMultiplicity_.data());
250 if (this->m_params.lateFishbone_ and
nhits > offsetBPIX2) {
251 const auto nthTot = 128;
253 const auto blockSize = nthTot /
stride;
255 const Vec2D blks{numberOfBlocks, 1u};
257 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
259 alpaka::exec<Acc2D>(
queue,
263 this->device_theCells_.data(),
264 this->device_nCells_.data(),
265 this->isOuterHitOfCell_.data(),
275 template <
typename TrackerTraits>
277 uint32_t offsetBPIX2,
279 using namespace caPixelDoublets;
286 auto nhits =
hh.metadata().size();
288 std::cout <<
"building Doublets out of " <<
nhits <<
" Hits" << std::endl;
300 cms::alpakatools::make_workdiv<Acc1D>(1, 1),
301 [] ALPAKA_FN_ACC(Acc1D
const &acc,
309 this->isOuterHitOfCell_.data(),
310 this->device_isOuterHitOfCell_.data(),
314 int threadsPerBlock = 128;
317 const auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(
blocks, threadsPerBlock);
319 alpaka::exec<Acc1D>(
queue,
322 this->isOuterHitOfCell_.data(),
324 this->device_theCellNeighbors_.data(),
325 this->device_theCellNeighborsContainer_,
326 this->device_theCellTracks_.data(),
327 this->device_theCellTracksContainer_);
342 int blocks = (4 *
nhits + threadsPerBlock - 1) / threadsPerBlock;
345 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
347 alpaka::exec<Acc2D>(
queue,
350 this->device_theCells_.data(),
351 this->device_nCells_.data(),
352 this->device_theCellNeighbors_.data(),
353 this->device_theCellTracks_.data(),
355 this->isOuterHitOfCell_.data(),
357 this->m_params.caParams_.maxNumberOfDoublets_,
358 this->m_params.cellCuts_);
365 template <
typename TrackerTraits>
371 uint32_t
nhits =
hh.metadata().size();
377 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
379 queue, workDiv1D, Kernel_classifyTracks<TrackerTraits>{},
tracks_view, this->m_params.qualityCuts_);
381 if (this->m_params.lateFishbone_) {
384 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
385 alpaka::exec<Acc1D>(
queue,
387 Kernel_fishboneCleaner<TrackerTraits>{},
388 this->device_theCells_.data(),
389 this->device_nCells_.data(),
395 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
396 alpaka::exec<Acc1D>(
queue,
398 Kernel_fastDuplicateRemover<TrackerTraits>{},
399 this->device_theCells_.data(),
400 this->device_nCells_.data(),
402 this->m_params.dupPassThrough_);
407 if (this->m_params.doSharedHitCut_ || this->m_params.doStats_) {
410 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
411 alpaka::exec<Acc1D>(
queue,
413 Kernel_countHitInTracks<TrackerTraits>{},
415 this->device_hitToTuple_.data());
419 queue, workDiv1D, Kernel_fillHitInTracks<TrackerTraits>{},
tracks_view, this->device_hitToTuple_.data());
425 if (this->m_params.doSharedHitCut_) {
429 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
430 alpaka::exec<Acc1D>(
queue,
432 Kernel_rejectDuplicate<TrackerTraits>{},
434 this->m_params.minHitsForSharingCut_,
435 this->m_params.dupPassThrough_,
436 this->device_hitToTuple_.data());
438 alpaka::exec<Acc1D>(
queue,
440 Kernel_sharedHitCleaner<TrackerTraits>{},
443 this->m_params.minHitsForSharingCut_,
444 this->m_params.dupPassThrough_,
445 this->device_hitToTuple_.data());
447 if (this->m_params.useSimpleTripletCleaner_) {
450 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
451 alpaka::exec<Acc1D>(
queue,
453 Kernel_simpleTripletCleaner<TrackerTraits>{},
455 this->m_params.minHitsForSharingCut_,
456 this->m_params.dupPassThrough_,
457 this->device_hitToTuple_.data());
460 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
461 alpaka::exec<Acc1D>(
queue,
463 Kernel_tripletCleaner<TrackerTraits>{},
465 this->m_params.minHitsForSharingCut_,
466 this->m_params.dupPassThrough_,
467 this->device_hitToTuple_.data());
474 if (this->m_params.doStats_) {
477 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
479 alpaka::exec<Acc1D>(
queue,
481 Kernel_checkOverflows<TrackerTraits>{},
483 this->device_tupleMultiplicity_.data(),
484 this->device_hitToTuple_.data(),
485 this->device_hitTuple_apc_,
486 this->device_theCells_.data(),
487 this->device_nCells_.data(),
488 this->device_theCellNeighbors_.data(),
489 this->device_theCellTracks_.data(),
490 this->isOuterHitOfCell_.data(),
492 this->m_params.caParams_.maxNumberOfDoublets_,
493 this->counters_.data());
496 if (this->m_params.doStats_) {
500 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
501 alpaka::exec<Acc1D>(
queue,
503 Kernel_doStatsForHitInTracks<TrackerTraits>{},
504 this->device_hitToTuple_.data(),
505 this->counters_.data());
508 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
510 queue, workDiv1D, Kernel_doStatsForTracks<TrackerTraits>{},
tracks_view, this->counters_.data());
516 #ifdef DUMP_GPU_TK_TUPLES 517 static std::atomic<int>
iev(0);
519 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(1u, 32u);
521 std::lock_guard<std::mutex> guard(
lock);
523 for (
int k = 0;
k < 20000;
k += 500) {
524 alpaka::exec<Acc1D>(
queue,
526 Kernel_print_found_ntuplets<TrackerTraits>{},
529 this->device_hitToTuple_.data(),
535 alpaka::exec<Acc1D>(
queue,
537 Kernel_print_found_ntuplets<TrackerTraits>{},
540 this->device_hitToTuple_.data(),
reco::TrackSoAView< TrackerTraits > TkSoAView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
CAHitNtupletGeneratorKernels(Params const ¶ms, uint32_t nhits, uint32_t offsetBPIX2, Queue &queue)
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
caStructures::template HitToTupleT< TrackerTraits > HitToTuple
typename reco::TrackSoA< TrackerTraits >::HitContainer HitContainer
constexpr uint32_t stride
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
def template(fileName, svg, replaceme="REPLACEME")
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int nActualPairs
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
void buildDoublets(const HitsConstView &hh, uint32_t offsetBPIX2, Queue &queue)
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
constexpr auto getDoubletsFromHistoMaxBlockSize
caStructures::CellTracksT< TrackerTraits > CellTracks
void launchKernels(const HitsConstView &hh, uint32_t offsetBPIX2, TkSoAView &track_view, Queue &queue)
ALPAKA_ASSERT_ACC(offsets)
void classifyTuples(const HitsConstView &hh, TkSoAView &track_view, Queue &queue)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell