db/d55/CAHitNtupletGeneratorKernels_8cc_source.html

 #include "RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h"


 #include <mutex>


 namespace {

   // cuda atomics are NOT atomics on CPU so protect stat update with a mutex

   // waiting for a more general solution (incuding multiple devices) to be proposed and implemented

   std::mutex lock_stat;

 }  // namespace


 template <>

 void CAHitNtupletGeneratorKernelsCPU::printCounters(Counters const *counters) {

   kernel_printCounters(counters);

 }


 template <>

 void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStream_t stream) {

   auto nhits = hh.nHits();


 #ifdef NTUPLE_DEBUG

   std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << hh.offsetBPIX2() << std::endl;

 #endif


   // use "nhits" to heuristically dimension the workspace


   // no need to use the Traits allocations, since we know this is being compiled for the CPU

   //device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);

   device_isOuterHitOfCell_ = std::make_unique<GPUCACell::OuterHitOfCellContainer[]>(std::max(1U, nhits));

   assert(device_isOuterHitOfCell_.get());

   isOuterHitOfCell_ = GPUCACell::OuterHitOfCell{device_isOuterHitOfCell_.get(), hh.offsetBPIX2()};


   auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) +

                          caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellTracks);

   // no need to use the Traits allocations, since we know this is being compiled for the CPU

   //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);

   cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);

   device_theCellNeighborsContainer_ = (GPUCACell::CellNeighbors *)cellStorage_.get();

   device_theCellTracksContainer_ = (GPUCACell::CellTracks *)(cellStorage_.get() + caConstants::maxNumOfActiveDoublets *

                                                                                       sizeof(GPUCACell::CellNeighbors));


   gpuPixelDoublets::initDoublets(isOuterHitOfCell_,

                                  nhits,

                                  device_theCellNeighbors_.get(),

                                  device_theCellNeighborsContainer_,

                                  device_theCellTracks_.get(),

                                  device_theCellTracksContainer_);


   // no need to use the Traits allocations, since we know this is being compiled for the CPU

   //device_theCells_ = Traits::template make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_, stream);

   device_theCells_ = std::make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_);

   if (0 == nhits)

     return;  // protect against empty events


   // take all layer pairs into account

   auto nActualPairs = gpuPixelDoublets::nPairs;

   if (not params_.includeJumpingForwardDoublets_) {

     // exclude forward "jumping" layer pairs

     nActualPairs = gpuPixelDoublets::nPairsForTriplets;

   }

   if (params_.minHitsPerNtuplet_ > 3) {

     // for quadruplets, exclude all "jumping" layer pairs

     nActualPairs = gpuPixelDoublets::nPairsForQuadruplets;

   }


   assert(nActualPairs <= gpuPixelDoublets::nPairs);

   gpuPixelDoublets::getDoubletsFromHisto(device_theCells_.get(),

                                          device_nCells_,

                                          device_theCellNeighbors_.get(),

                                          device_theCellTracks_.get(),

                                          hh.view(),

                                          isOuterHitOfCell_,

                                          nActualPairs,

                                          params_.idealConditions_,

                                          params_.doClusterCut_,

                                          params_.doZ0Cut_,

                                          params_.doPtCut_,

                                          params_.maxNumberOfDoublets_);

 }


 template <>

 void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {

   auto *tuples_d = &tracks_d->hitIndices;

   auto *detId_d = &tracks_d->detIndices;

   auto *quality_d = tracks_d->qualityData();


   assert(tuples_d && quality_d);


   // zero tuples

   cms::cuda::launchZero(tuples_d, cudaStream);


   auto nhits = hh.nHits();


   // std::cout << "N hits " << nhits << std::endl;

   // if (nhits<2) std::cout << "too few hits " << nhits << std::endl;


   //

   // applying conbinatoric cleaning such as fishbone at this stage is too expensive

   //


   kernel_connect(device_hitTuple_apc_,

                  device_hitToTuple_apc_,  // needed only to be reset, ready for next kernel

                  hh.view(),

                  device_theCells_.get(),

                  device_nCells_,

                  device_theCellNeighbors_.get(),

                  isOuterHitOfCell_,

                  params_.hardCurvCut_,

                  params_.ptmin_,

                  params_.CAThetaCutBarrel_,

                  params_.CAThetaCutForward_,

                  params_.dcaCutInnerTriplet_,

                  params_.dcaCutOuterTriplet_);


   if (nhits > 1 && params_.earlyFishbone_) {

     gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, false);

   }


   kernel_find_ntuplets(hh.view(),

                        device_theCells_.get(),

                        device_nCells_,

                        device_theCellTracks_.get(),

                        tuples_d,

                        device_hitTuple_apc_,

                        quality_d,

                        params_.minHitsPerNtuplet_);

   if (params_.doStats_)

     kernel_mark_used(device_theCells_.get(), device_nCells_);


   cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d);


   kernel_fillHitDetIndices(tuples_d, hh.view(), detId_d);

   kernel_fillNLayers(tracks_d);


   // remove duplicates (tracks that share a doublet)

   kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, quality_d, params_.dupPassThrough_);


   kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());

   cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream);

   kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());


   if (nhits > 1 && params_.lateFishbone_) {

     gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, true);

   }

 }


 template <>

 void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {

   int32_t nhits = hh.nHits();


   auto const *tuples_d = &tracks_d->hitIndices;

   auto *quality_d = tracks_d->qualityData();


   // classify tracks based on kinematics

   kernel_classifyTracks(tuples_d, tracks_d, params_.cuts_, quality_d);


   if (params_.lateFishbone_) {

     // apply fishbone cleaning to good tracks

     kernel_fishboneCleaner(device_theCells_.get(), device_nCells_, quality_d);

   }


   // remove duplicates (tracks that share a doublet)

   kernel_fastDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, params_.dupPassThrough_);


   // fill hit->track "map"

   if (params_.doSharedHitCut_ || params_.doStats_) {

     kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());

     cms::cuda::launchFinalize(hitToTupleView_, cudaStream);

     kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());

   }


   // remove duplicates (tracks that share at least one hit)

   if (params_.doSharedHitCut_) {

     kernel_rejectDuplicate(

         tracks_d, quality_d, params_.minHitsForSharingCut_, params_.dupPassThrough_, device_hitToTuple_.get());


     kernel_sharedHitCleaner(hh.view(),

                             tracks_d,

                             quality_d,

                             params_.minHitsForSharingCut_,

                             params_.dupPassThrough_,

                             device_hitToTuple_.get());

     if (params_.useSimpleTripletCleaner_) {

       kernel_simpleTripletCleaner(

           tracks_d, quality_d, params_.minHitsForSharingCut_, params_.dupPassThrough_, device_hitToTuple_.get());

     } else {

       kernel_tripletCleaner(

           tracks_d, quality_d, params_.minHitsForSharingCut_, params_.dupPassThrough_, device_hitToTuple_.get());

     }

   }


   if (params_.doStats_) {

     std::lock_guard guard(lock_stat);

     kernel_checkOverflows(tuples_d,

                           device_tupleMultiplicity_.get(),

                           device_hitToTuple_.get(),

                           device_hitTuple_apc_,

                           device_theCells_.get(),

                           device_nCells_,

                           device_theCellNeighbors_.get(),

                           device_theCellTracks_.get(),

                           isOuterHitOfCell_,

                           nhits,

                           params_.maxNumberOfDoublets_,

                           counters_);

   }


   if (params_.doStats_) {

     // counters (add flag???)

     std::lock_guard guard(lock_stat);

     kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_);

     kernel_doStatsForTracks(tuples_d, quality_d, counters_);

   }


 #ifdef DUMP_GPU_TK_TUPLES

   static std::atomic<int> iev(0);

   ++iev;

   kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev);

 #endif

 }

CAHitNtupletGeneratorKernels::launchKernels
void launchKernels(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
Definition: CAHitNtupletGeneratorKernels.cc:81

cAHitNtupletGenerator::Params::includeJumpingForwardDoublets_
const bool includeJumpingForwardDoublets_
Definition: CAHitNtupletGeneratorKernels.h:113

caConstants::maxNumOfActiveDoublets
constexpr uint32_t maxNumOfActiveDoublets
Definition: CAConstants.h:41

CAHitNtupletGeneratorKernels::device_hitToTuple_
unique_ptr< HitToTuple > device_hitToTuple_
Definition: CAHitNtupletGeneratorKernels.h:210

cAHitNtupletGenerator::Params::doSharedHitCut_
const bool doSharedHitCut_
Definition: CAHitNtupletGeneratorKernels.h:121

CAHitNtupletGeneratorKernels::buildDoublets
void buildDoublets(HitsOnCPU const &hh, cudaStream_t stream)
Definition: CAHitNtupletGeneratorKernels.cc:17

CAHitNtupletGeneratorKernels::hitToTupleView_
HitToTuple::View hitToTupleView_
Definition: CAHitNtupletGeneratorKernels.h:212

cAHitNtupletGenerator::Params::hardCurvCut_
const float hardCurvCut_
Definition: CAHitNtupletGeneratorKernels.h:127

CAHitNtupletGeneratorKernels::classifyTuples
void classifyTuples(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
Definition: CAHitNtupletGeneratorKernels.cc:147

mutex
static std::mutex mutex
Definition: Proxy.cc:8

cAHitNtupletGenerator::Params::doClusterCut_
const bool doClusterCut_
Definition: CAHitNtupletGeneratorKernels.h:118

TrackSoAHeterogeneousT::qualityData
constexpr Quality const * qualityData() const
Definition: TrackSoAHeterogeneousT.h:40

cAHitNtupletGenerator::Params::doStats_
const bool doStats_
Definition: CAHitNtupletGeneratorKernels.h:117

cms::cuda::stream
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

TrackingRecHit2DHeterogeneous
Definition: TrackingRecHit2DHeterogeneous.h:8

gpuPixelDoublets::nPairs
constexpr int nPairs
Definition: gpuPixelDoublets.h:12

cms::cuda::assert
assert(be >=bs)

CAHitNtupletGeneratorKernels::counters_
Counters * counters_
Definition: CAHitNtupletGeneratorKernels.h:196

cAHitNtupletGenerator::Counters
Definition: CAHitNtupletGeneratorKernels.h:14

CAHitNtupletGeneratorKernels::printCounters
static void printCounters(Counters const *counters)
Definition: CAHitNtupletGeneratorKernels.cc:12

cAHitNtupletGenerator::Params::doZ0Cut_
const bool doZ0Cut_
Definition: CAHitNtupletGeneratorKernels.h:119

nhits
Definition: HIMultiTrackSelector.h:42

caConstants::OuterHitOfCell
Definition: CAConstants.h:82

counters
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
Definition: CAHitNtupletGeneratorKernelsImpl.h:53

cAHitNtupletGenerator::Params::minHitsPerNtuplet_
const uint32_t minHitsPerNtuplet_
Definition: CAHitNtupletGeneratorKernels.h:108

cAHitNtupletGenerator::Params::useSimpleTripletCleaner_
const bool useSimpleTripletCleaner_
Definition: CAHitNtupletGeneratorKernels.h:123

cAHitNtupletGenerator::Params::dcaCutInnerTriplet_
const float dcaCutInnerTriplet_
Definition: CAHitNtupletGeneratorKernels.h:128

CAHitNtupletGeneratorKernels::device_theCells_
unique_ptr< GPUCACell[]> device_theCells_
Definition: CAHitNtupletGeneratorKernels.h:205

cAHitNtupletGenerator::Params::earlyFishbone_
const bool earlyFishbone_
Definition: CAHitNtupletGeneratorKernels.h:114

cAHitNtupletGenerator::Params::minHitsForSharingCut_
const uint16_t minHitsForSharingCut_
Definition: CAHitNtupletGeneratorKernels.h:110

cAHitNtupletGenerator::Params::idealConditions_
const bool idealConditions_
Definition: CAHitNtupletGeneratorKernels.h:116

CAHitNtupletGeneratorKernels::device_theCellNeighbors_
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
Definition: CAHitNtupletGeneratorKernels.h:200

SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169

CAHitNtupletGeneratorKernels::cellStorage_
unique_ptr< unsigned char[]> cellStorage_
Definition: CAHitNtupletGeneratorKernels.h:199

GPUCACell::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:27

cAHitNtupletGenerator::Params::CAThetaCutForward_
const float CAThetaCutForward_
Definition: CAHitNtupletGeneratorKernels.h:126

gpuPixelDoublets::nPairsForTriplets
constexpr int nPairsForTriplets
Definition: gpuPixelDoublets.h:11

cAHitNtupletGenerator::Params::dcaCutOuterTriplet_
const float dcaCutOuterTriplet_
Definition: CAHitNtupletGeneratorKernels.h:129

CAHitNtupletGeneratorKernels::device_theCellNeighborsContainer_
caConstants::CellNeighbors * device_theCellNeighborsContainer_
Definition: CAHitNtupletGeneratorKernels.h:201

CAHitNtupletGeneratorKernels::device_nCells_
uint32_t * device_nCells_
Definition: CAHitNtupletGeneratorKernels.h:208

cms::cuda::VecArray
Definition: VecArray.h:14

gpuPixelDoublets::nActualPairs
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int nActualPairs
Definition: gpuPixelDoublets.h:99

cAHitNtupletGenerator::Params::doPtCut_
const bool doPtCut_
Definition: CAHitNtupletGeneratorKernels.h:120

TrackingRecHit2DHeterogeneous::nHits
auto nHits() const
Definition: TrackingRecHit2DHeterogeneous.h:35

cAHitNtupletGenerator::Params::ptmin_
const float ptmin_
Definition: CAHitNtupletGeneratorKernels.h:124

CAHitNtupletGeneratorKernels::device_hitTuple_apc_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:216

CAHitNtupletGeneratorKernels::device_isOuterHitOfCell_
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
Definition: CAHitNtupletGeneratorKernels.h:206

CAHitNtupletGeneratorKernels::isOuterHitOfCell_
GPUCACell::OuterHitOfCell isOuterHitOfCell_
Definition: CAHitNtupletGeneratorKernels.h:207

CAHitNtupletGeneratorKernels::device_theCellTracksContainer_
caConstants::CellTracks * device_theCellTracksContainer_
Definition: CAHitNtupletGeneratorKernels.h:203

TrackSoAHeterogeneousT::detIndices
HitContainer detIndices
Definition: TrackSoAHeterogeneousT.h:82

TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:24

cAHitNtupletGenerator::Params::CAThetaCutBarrel_
const float CAThetaCutBarrel_
Definition: CAHitNtupletGeneratorKernels.h:125

cAHitNtupletGenerator::Params::lateFishbone_
const bool lateFishbone_
Definition: CAHitNtupletGeneratorKernels.h:115

TrackingRecHit2DHeterogeneous::offsetBPIX2
auto offsetBPIX2() const
Definition: TrackingRecHit2DHeterogeneous.h:36

gpuPixelDoublets::nPairsForQuadruplets
constexpr int nPairsForQuadruplets
Definition: gpuPixelDoublets.h:10

cAHitNtupletGenerator::Params::dupPassThrough_
const bool dupPassThrough_
Definition: CAHitNtupletGeneratorKernels.h:122

cAHitNtupletGenerator::Params::maxNumberOfDoublets_
const uint32_t maxNumberOfDoublets_
Definition: CAHitNtupletGeneratorKernels.h:109

CAHitNtupletGeneratorKernelsImpl.h

gather_cfg.cout
tuple cout
Definition: gather_cfg.py:144

TrackingRecHit2DHeterogeneous::view
TrackingRecHit2DSOAView * view()
Definition: TrackingRecHit2DHeterogeneous.h:32

iev
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int iev
Definition: CAHitNtupletGeneratorKernelsImpl.h:864

cAHitNtupletGenerator::Params::cuts_
QualityCuts cuts_
Definition: CAHitNtupletGeneratorKernels.h:132

TrackSoAHeterogeneousT::hitIndices
HitContainer hitIndices
Definition: TrackSoAHeterogeneousT.h:81

CAHitNtupletGeneratorKernels::params_
Params const & params_
Definition: CAHitNtupletGeneratorKernels.h:222

CAHitNtupletGeneratorKernels::device_theCellTracks_
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
Definition: CAHitNtupletGeneratorKernels.h:202

hh
auto const & hh
Definition: CAHitNtupletGeneratorKernelsImpl.h:552

CAHitNtupletGeneratorKernels::device_tupleMultiplicity_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
Definition: CAHitNtupletGeneratorKernels.h:218

CAHitNtupletGeneratorKernels::device_hitToTuple_apc_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:214