2 #ifdef DUMP_GPU_TK_TUPLES 7 #include <alpaka/alpaka.hpp> 27 template <
typename TrackerTraits>
36 counters_{cms::alpakatools::make_device_buffer<Counters>(
queue)},
39 device_hitToTuple_{cms::alpakatools::make_device_buffer<HitToTuple>(
queue)},
40 device_tupleMultiplicity_{cms::alpakatools::make_device_buffer<TupleMultiplicity>(
queue)},
44 cms::alpakatools::make_device_buffer<CACell[]>(
queue, m_params.caParams_.maxNumberOfDoublets_)},
46 device_isOuterHitOfCell_{
47 cms::alpakatools::make_device_buffer<OuterHitOfCellContainer[]>(
queue,
std::max(1u,
nhits - offsetBPIX2))},
48 isOuterHitOfCell_{cms::alpakatools::make_device_buffer<OuterHitOfCell>(
queue)},
50 device_theCellNeighbors_{cms::alpakatools::make_device_buffer<CellNeighborsVector>(
queue)},
51 device_theCellTracks_{cms::alpakatools::make_device_buffer<CellTracksVector>(
queue)},
53 cellStorage_{cms::alpakatools::make_device_buffer<unsigned char[]>(
55 TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellNeighbors) +
56 TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellTracks))},
57 device_cellCuts_{cms::alpakatools::make_device_buffer<CellCuts>(
queue)},
58 device_theCellNeighborsContainer_{
reinterpret_cast<CellNeighbors *
>(cellStorage_.data())},
59 device_theCellTracksContainer_{
reinterpret_cast<CellTracks *
>(
60 cellStorage_.data() + TrackerTraits::maxNumOfActiveDoublets *
sizeof(
CellNeighbors))},
64 cms::alpakatools::make_device_buffer<cms::alpakatools::AtomicPairCounter::DoubleWord[]>(
queue, 3u)},
68 *reinterpret_cast<uint32_t *>(device_storage_.data() + 2))} {
69 alpaka::memset(
queue, counters_, 0);
70 alpaka::memset(
queue, device_nCells_, 0);
71 alpaka::memset(
queue, cellStorage_, 0);
74 alpaka::memcpy(
queue, device_cellCuts_, cellCuts_h);
76 [[maybe_unused]]
TupleMultiplicity *tupleMultiplicityDeviceData = device_tupleMultiplicity_.data();
77 [[maybe_unused]]
HitToTuple *hitToTupleDeviceData = device_hitToTuple_.data();
79 TrackerTraits::maxHitsOnTrack + 1,
80 TrackerTraits::maxNumberOfTuples>;
81 TM *tm = device_tupleMultiplicity_.data();
87 template <
typename TrackerTraits>
92 using namespace caPixelDoublets;
98 uint32_t
nhits =
hh.metadata().size();
101 std::cout <<
"start tuple building. N hits " <<
nhits << std::endl;
110 const auto nthTot = 64;
112 auto blockSize = nthTot /
stride;
114 const auto rescale = numberOfBlocks / 65536;
115 blockSize *= (rescale + 1);
117 assert(numberOfBlocks < 65536);
118 assert(blockSize > 0 && 0 == blockSize % 16);
119 const Vec2D blks{numberOfBlocks, 1u};
121 const auto kernelConnectWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
123 alpaka::exec<Acc2D>(
queue,
124 kernelConnectWorkDiv,
125 Kernel_connect<TrackerTraits>{},
126 this->device_hitTuple_apc_,
127 this->device_hitToTuple_apc_,
129 this->device_theCells_.data(),
130 this->device_nCells_.data(),
131 this->device_theCellNeighbors_.data(),
132 this->isOuterHitOfCell_.data(),
133 this->m_params.caParams_);
136 if (this->m_params.earlyFishbone_ and
nhits > offsetBPIX2) {
137 const auto nthTot = 128;
139 const auto blockSize = nthTot /
stride;
141 const Vec2D blks{numberOfBlocks, 1u};
143 const auto fishboneWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
144 alpaka::exec<Acc2D>(
queue,
148 this->device_theCells_.data(),
149 this->device_nCells_.data(),
150 this->isOuterHitOfCell_.data(),
156 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
157 alpaka::exec<Acc1D>(
queue,
159 Kernel_find_ntuplets<TrackerTraits>{},
162 this->device_theCells_.data(),
163 this->device_nCells_.data(),
164 this->device_theCellTracks_.data(),
165 this->device_hitTuple_apc_,
166 this->m_params.caParams_);
171 if (this->m_params.doStats_)
172 alpaka::exec<Acc1D>(
queue,
174 Kernel_mark_used<TrackerTraits>{},
175 this->device_theCells_.data(),
176 this->device_nCells_.data());
184 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
187 queue, workDiv1D,
typename HitContainer::finalizeBulk{}, this->device_hitTuple_apc_, &
tracks_view.hitIndices());
193 alpaka::exec<Acc1D>(
queue, workDiv1D, Kernel_fillHitDetIndices<TrackerTraits>{},
tracks_view,
hh);
198 alpaka::exec<Acc1D>(
queue, workDiv1D, Kernel_fillNLayers<TrackerTraits>{},
tracks_view, this->device_hitTuple_apc_);
206 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
208 alpaka::exec<Acc1D>(
queue,
210 Kernel_earlyDuplicateRemover<TrackerTraits>{},
211 this->device_theCells_.data(),
212 this->device_nCells_.data(),
214 this->m_params.dupPassThrough_);
221 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
223 alpaka::exec<Acc1D>(
queue,
225 Kernel_countMultiplicity<TrackerTraits>{},
227 this->device_tupleMultiplicity_.data());
230 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
232 queue, workDiv1D, Kernel_fillMultiplicity<TrackerTraits>{},
tracks_view, this->device_tupleMultiplicity_.data());
237 if (this->m_params.lateFishbone_ and
nhits > offsetBPIX2) {
238 const auto nthTot = 128;
240 const auto blockSize = nthTot /
stride;
242 const Vec2D blks{numberOfBlocks, 1u};
244 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
246 alpaka::exec<Acc2D>(
queue,
250 this->device_theCells_.data(),
251 this->device_nCells_.data(),
252 this->isOuterHitOfCell_.data(),
262 template <
typename TrackerTraits>
264 uint32_t offsetBPIX2,
266 using namespace caPixelDoublets;
273 auto nhits =
hh.metadata().size();
275 std::cout <<
"building Doublets out of " <<
nhits <<
" Hits" << std::endl;
287 cms::alpakatools::make_workdiv<Acc1D>(1, 1),
288 [] ALPAKA_FN_ACC(Acc1D
const &acc,
296 this->isOuterHitOfCell_.data(),
297 this->device_isOuterHitOfCell_.data(),
301 int threadsPerBlock = 128;
304 const auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(
blocks, threadsPerBlock);
306 alpaka::exec<Acc1D>(
queue,
309 this->isOuterHitOfCell_.data(),
311 this->device_theCellNeighbors_.data(),
312 this->device_theCellNeighborsContainer_,
313 this->device_theCellTracks_.data(),
314 this->device_theCellTracksContainer_);
329 int blocks = (4 *
nhits + threadsPerBlock - 1) / threadsPerBlock;
332 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
334 alpaka::exec<Acc2D>(
queue,
337 this->device_theCells_.data(),
338 this->device_nCells_.data(),
339 this->device_theCellNeighbors_.data(),
340 this->device_theCellTracks_.data(),
342 this->isOuterHitOfCell_.data(),
344 this->m_params.caParams_.maxNumberOfDoublets_,
345 this->m_params.cellCuts_);
352 template <
typename TrackerTraits>
358 uint32_t
nhits =
hh.metadata().size();
364 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
366 queue, workDiv1D, Kernel_classifyTracks<TrackerTraits>{},
tracks_view, this->m_params.qualityCuts_);
368 if (this->m_params.lateFishbone_) {
371 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
372 alpaka::exec<Acc1D>(
queue,
374 Kernel_fishboneCleaner<TrackerTraits>{},
375 this->device_theCells_.data(),
376 this->device_nCells_.data(),
382 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
383 alpaka::exec<Acc1D>(
queue,
385 Kernel_fastDuplicateRemover<TrackerTraits>{},
386 this->device_theCells_.data(),
387 this->device_nCells_.data(),
389 this->m_params.dupPassThrough_);
394 if (this->m_params.doSharedHitCut_ || this->m_params.doStats_) {
397 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
398 alpaka::exec<Acc1D>(
queue,
400 Kernel_countHitInTracks<TrackerTraits>{},
402 this->device_hitToTuple_.data());
406 queue, workDiv1D, Kernel_fillHitInTracks<TrackerTraits>{},
tracks_view, this->device_hitToTuple_.data());
412 if (this->m_params.doSharedHitCut_) {
416 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
417 alpaka::exec<Acc1D>(
queue,
419 Kernel_rejectDuplicate<TrackerTraits>{},
421 this->m_params.minHitsForSharingCut_,
422 this->m_params.dupPassThrough_,
423 this->device_hitToTuple_.data());
425 alpaka::exec<Acc1D>(
queue,
427 Kernel_sharedHitCleaner<TrackerTraits>{},
430 this->m_params.minHitsForSharingCut_,
431 this->m_params.dupPassThrough_,
432 this->device_hitToTuple_.data());
434 if (this->m_params.useSimpleTripletCleaner_) {
437 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
438 alpaka::exec<Acc1D>(
queue,
440 Kernel_simpleTripletCleaner<TrackerTraits>{},
442 this->m_params.minHitsForSharingCut_,
443 this->m_params.dupPassThrough_,
444 this->device_hitToTuple_.data());
447 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
448 alpaka::exec<Acc1D>(
queue,
450 Kernel_tripletCleaner<TrackerTraits>{},
452 this->m_params.minHitsForSharingCut_,
453 this->m_params.dupPassThrough_,
454 this->device_hitToTuple_.data());
461 if (this->m_params.doStats_) {
464 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
466 alpaka::exec<Acc1D>(
queue,
468 Kernel_checkOverflows<TrackerTraits>{},
470 this->device_tupleMultiplicity_.data(),
471 this->device_hitToTuple_.data(),
472 this->device_hitTuple_apc_,
473 this->device_theCells_.data(),
474 this->device_nCells_.data(),
475 this->device_theCellNeighbors_.data(),
476 this->device_theCellTracks_.data(),
477 this->isOuterHitOfCell_.data(),
479 this->m_params.caParams_.maxNumberOfDoublets_,
480 this->counters_.data());
483 if (this->m_params.doStats_) {
487 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
488 alpaka::exec<Acc1D>(
queue,
490 Kernel_doStatsForHitInTracks<TrackerTraits>{},
491 this->device_hitToTuple_.data(),
492 this->counters_.data());
495 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
497 queue, workDiv1D, Kernel_doStatsForTracks<TrackerTraits>{},
tracks_view, this->counters_.data());
503 #ifdef DUMP_GPU_TK_TUPLES 504 static std::atomic<int>
iev(0);
506 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(1u, 32u);
508 std::lock_guard<std::mutex> guard(
lock);
510 for (
int k = 0;
k < 20000;
k += 500) {
511 alpaka::exec<Acc1D>(
queue,
513 Kernel_print_found_ntuplets<TrackerTraits>{},
516 this->device_hitToTuple_.data(),
522 alpaka::exec<Acc1D>(
queue,
524 Kernel_print_found_ntuplets<TrackerTraits>{},
527 this->device_hitToTuple_.data(),
reco::TrackSoAView< TrackerTraits > TkSoAView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
CAHitNtupletGeneratorKernels(Params const ¶ms, uint32_t nhits, uint32_t offsetBPIX2, Queue &queue)
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
caStructures::template HitToTupleT< TrackerTraits > HitToTuple
typename reco::TrackSoA< TrackerTraits >::HitContainer HitContainer
constexpr uint32_t stride
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
def template(fileName, svg, replaceme="REPLACEME")
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int nActualPairs
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
void buildDoublets(const HitsConstView &hh, uint32_t offsetBPIX2, Queue &queue)
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
constexpr auto getDoubletsFromHistoMaxBlockSize
caStructures::CellTracksT< TrackerTraits > CellTracks
caStructures::HitToTupleT< TrackerTraits > HitToTuple
void launchKernels(const HitsConstView &hh, uint32_t offsetBPIX2, TkSoAView &track_view, Queue &queue)
ALPAKA_ASSERT_ACC(offsets)
void classifyTuples(const HitsConstView &hh, TkSoAView &track_view, Queue &queue)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell