CMS 3D CMS Logo

List of all members | Public Types | Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes
CAHitNtupletGeneratorKernels< TTraits > Class Template Reference

#include <CAHitNtupletGeneratorKernels.h>

Public Types

using Counters = cAHitNtupletGenerator::Counters
 
using HitContainer = pixelTrack::HitContainer
 
using HitsOnCPU = TrackingRecHit2DHeterogeneous< Traits >
 
using HitsOnGPU = TrackingRecHit2DSOAView
 
using HitsView = TrackingRecHit2DSOAView
 
using HitToTuple = caConstants::HitToTuple
 
using Params = cAHitNtupletGenerator::Params
 
using Quality = pixelTrack::Quality
 
using QualityCuts = cAHitNtupletGenerator::QualityCuts
 
using TkSoA = pixelTrack::TrackSoA
 
using Traits = TTraits
 
using TupleMultiplicity = caConstants::TupleMultiplicity
 
template<typename T >
using unique_ptr = typename Traits::template unique_ptr< T >
 

Public Member Functions

void allocateOnGPU (int32_t nHits, cudaStream_t stream)
 
void buildDoublets (HitsOnCPU const &hh, cudaStream_t stream)
 
 CAHitNtupletGeneratorKernels (Params const &params)
 
void classifyTuples (HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
 
void cleanup (cudaStream_t cudaStream)
 
void launchKernels (HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
 
void setCounters (Counters *counters)
 
TupleMultiplicity const * tupleMultiplicity () const
 
 ~CAHitNtupletGeneratorKernels ()=default
 

Static Public Member Functions

static void printCounters (Counters const *counters)
 

Private Member Functions

uint32_t nDoubletBlocks (uint32_t blockSize)
 Compute the number of doublet blocks for block size. More...
 
uint32_t nQuadrupletBlocks (uint32_t blockSize)
 Compute the number of quadruplet blocks for block size. More...
 

Private Attributes

unique_ptr< unsigned char[]> cellStorage_
 
Counterscounters_ = nullptr
 
unique_ptr< HitToTupledevice_hitToTuple_
 
cms::cuda::AtomicPairCounterdevice_hitToTuple_apc_ = nullptr
 
unique_ptr< HitToTuple::Counter[]> device_hitToTupleStorage_
 
cms::cuda::AtomicPairCounterdevice_hitTuple_apc_ = nullptr
 
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
 
uint32_t * device_nCells_ = nullptr
 
unique_ptr< cms::cuda::AtomicPairCounter::c_type[]> device_storage_
 
unique_ptr< caConstants::CellNeighborsVectordevice_theCellNeighbors_
 
caConstants::CellNeighborsdevice_theCellNeighborsContainer_
 
unique_ptr< GPUCACell[]> device_theCells_
 
unique_ptr< caConstants::CellTracksVectordevice_theCellTracks_
 
caConstants::CellTracksdevice_theCellTracksContainer_
 
unique_ptr< TupleMultiplicitydevice_tupleMultiplicity_
 
HitToTuple::View hitToTupleView_
 
GPUCACell::OuterHitOfCell isOuterHitOfCell_
 
Params const & params_
 
const uint32_t paramsMaxDoubletes3Quarters_
 Intermediate result avoiding repeated computations. More...
 

Detailed Description

template<typename TTraits>
class CAHitNtupletGeneratorKernels< TTraits >

Definition at line 157 of file CAHitNtupletGeneratorKernels.h.

Member Typedef Documentation

◆ Counters

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::Counters = cAHitNtupletGenerator::Counters

Definition at line 163 of file CAHitNtupletGeneratorKernels.h.

◆ HitContainer

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::HitContainer = pixelTrack::HitContainer

Definition at line 177 of file CAHitNtupletGeneratorKernels.h.

◆ HitsOnCPU

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::HitsOnCPU = TrackingRecHit2DHeterogeneous<Traits>

Definition at line 170 of file CAHitNtupletGeneratorKernels.h.

◆ HitsOnGPU

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::HitsOnGPU = TrackingRecHit2DSOAView

Definition at line 169 of file CAHitNtupletGeneratorKernels.h.

◆ HitsView

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::HitsView = TrackingRecHit2DSOAView

Definition at line 168 of file CAHitNtupletGeneratorKernels.h.

◆ HitToTuple

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::HitToTuple = caConstants::HitToTuple

Definition at line 172 of file CAHitNtupletGeneratorKernels.h.

◆ Params

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::Params = cAHitNtupletGenerator::Params

Definition at line 162 of file CAHitNtupletGeneratorKernels.h.

◆ Quality

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::Quality = pixelTrack::Quality

Definition at line 175 of file CAHitNtupletGeneratorKernels.h.

◆ QualityCuts

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::QualityCuts = cAHitNtupletGenerator::QualityCuts

Definition at line 161 of file CAHitNtupletGeneratorKernels.h.

◆ TkSoA

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::TkSoA = pixelTrack::TrackSoA

Definition at line 176 of file CAHitNtupletGeneratorKernels.h.

◆ Traits

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::Traits = TTraits

Definition at line 159 of file CAHitNtupletGeneratorKernels.h.

◆ TupleMultiplicity

template<typename TTraits >
using CAHitNtupletGeneratorKernels< TTraits >::TupleMultiplicity = caConstants::TupleMultiplicity

Definition at line 173 of file CAHitNtupletGeneratorKernels.h.

◆ unique_ptr

template<typename TTraits >
template<typename T >
using CAHitNtupletGeneratorKernels< TTraits >::unique_ptr = typename Traits::template unique_ptr<T>

Definition at line 166 of file CAHitNtupletGeneratorKernels.h.

Constructor & Destructor Documentation

◆ CAHitNtupletGeneratorKernels()

template<typename TTraits >
CAHitNtupletGeneratorKernels< TTraits >::CAHitNtupletGeneratorKernels ( Params const &  params)
inline

Definition at line 179 of file CAHitNtupletGeneratorKernels.h.

180  : params_(params), paramsMaxDoubletes3Quarters_(3 * params.maxNumberOfDoublets_ / 4) {}
const uint32_t paramsMaxDoubletes3Quarters_
Intermediate result avoiding repeated computations.

◆ ~CAHitNtupletGeneratorKernels()

template<typename TTraits >
CAHitNtupletGeneratorKernels< TTraits >::~CAHitNtupletGeneratorKernels ( )
default

Member Function Documentation

◆ allocateOnGPU()

void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU ( int32_t  nHits,
cudaStream_t  stream 
)

Definition at line 9 of file CAHitNtupletGeneratorKernelsAlloc.cc.

References cms::cuda::assert(), cms::cuda::OneToManyAssocView< Assoc >::assoc, gather_cfg::cout, cudaCheck, CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_, CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_apc_, CAHitNtupletGeneratorKernels< TTraits >::device_hitToTupleStorage_, CAHitNtupletGeneratorKernels< TTraits >::device_hitTuple_apc_, CAHitNtupletGeneratorKernels< TTraits >::device_nCells_, CAHitNtupletGeneratorKernels< TTraits >::device_storage_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighbors_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracks_, CAHitNtupletGeneratorKernels< TTraits >::device_tupleMultiplicity_, CAHitNtupletGeneratorKernels< TTraits >::hitToTupleView_, nHits, cms::cuda::OneToManyAssocView< Assoc >::offSize, cms::cuda::OneToManyAssocView< Assoc >::offStorage, cms::cuda::stream, svgfig::template(), and relativeConstraints::value.

Referenced by CAHitNtupletGeneratorOnGPU::makeTuples(), and CAHitNtupletGeneratorOnGPU::makeTuplesAsync().

9  {
10 #endif
11  // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)
14 
15  device_theCellNeighbors_ = Traits::template make_unique<caConstants::CellNeighborsVector>(stream);
16  device_theCellTracks_ = Traits::template make_unique<caConstants::CellTracksVector>(stream);
17 
18 #ifdef GPU_DEBUG
19  std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;
20 #endif
21 
22  nHits++; // storage requires one more counter;
23  assert(nHits > 0);
24  device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);
25  device_hitToTupleStorage_ = Traits::template make_unique<HitToTuple::Counter[]>(nHits, stream);
29 
30  device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);
31 
32  device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);
33 
36  device_nCells_ = (uint32_t*)(device_storage_.get() + 2);
37 
38  // FIXME: consider collapsing these 3 in one adhoc kernel
40  cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));
41  } else {
42  *device_nCells_ = 0;
43  }
44  cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream);
45  cms::cuda::launchZero(hitToTupleView_, stream); // we may wish to keep it in the edm
46 #ifdef GPU_DEBUG
47  cudaDeviceSynchronize();
48  cudaCheck(cudaGetLastError());
49 #endif
50 }
unique_ptr< HitToTuple > device_hitToTuple_
unique_ptr< HitToTuple::Counter[]> device_hitToTupleStorage_
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
assert(be >=bs)
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
unique_ptr< cms::cuda::AtomicPairCounter::c_type[]> device_storage_
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_

◆ buildDoublets()

void CAHitNtupletGeneratorKernelsCPU::buildDoublets ( HitsOnCPU const &  hh,
cudaStream_t  stream 
)

Definition at line 17 of file CAHitNtupletGeneratorKernels.cc.

References cms::cuda::assert(), CAHitNtupletGeneratorKernels< TTraits >::cellStorage_, gather_cfg::cout, CAHitNtupletGeneratorKernels< TTraits >::device_isOuterHitOfCell_, CAHitNtupletGeneratorKernels< TTraits >::device_nCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighbors_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighborsContainer_, CAHitNtupletGeneratorKernels< TTraits >::device_theCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracks_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracksContainer_, cAHitNtupletGenerator::Params::doClusterCut_, cAHitNtupletGenerator::Params::doPtCut_, cAHitNtupletGenerator::Params::doZ0Cut_, hh, cAHitNtupletGenerator::Params::idealConditions_, cAHitNtupletGenerator::Params::includeJumpingForwardDoublets_, CAHitNtupletGeneratorKernels< TTraits >::isOuterHitOfCell_, SiStripPI::max, cAHitNtupletGenerator::Params::maxNumberOfDoublets_, caConstants::maxNumOfActiveDoublets, cAHitNtupletGenerator::Params::minHitsPerNtuplet_, gpuPixelDoublets::nActualPairs, nhits, gpuPixelDoublets::nPairs, gpuPixelDoublets::nPairsForQuadruplets, gpuPixelDoublets::nPairsForTriplets, CAHitNtupletGeneratorKernels< TTraits >::params_, and mitigatedMETSequence_cff::U.

Referenced by CAHitNtupletGeneratorOnGPU::makeTuples(), and CAHitNtupletGeneratorOnGPU::makeTuplesAsync().

17  {
18  auto nhits = hh.nHits();
19 
20 #ifdef NTUPLE_DEBUG
21  std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << hh.offsetBPIX2() << std::endl;
22 #endif
23 
24  // use "nhits" to heuristically dimension the workspace
25 
26  // no need to use the Traits allocations, since we know this is being compiled for the CPU
27  //device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
28  device_isOuterHitOfCell_ = std::make_unique<GPUCACell::OuterHitOfCellContainer[]>(std::max(1U, nhits));
31 
32  auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) +
34  // no need to use the Traits allocations, since we know this is being compiled for the CPU
35  //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);
36  cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
39  sizeof(GPUCACell::CellNeighbors));
40 
41  gpuPixelDoublets::initDoublets(isOuterHitOfCell_,
42  nhits,
47 
48  // no need to use the Traits allocations, since we know this is being compiled for the CPU
49  //device_theCells_ = Traits::template make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_, stream);
50  device_theCells_ = std::make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_);
51  if (0 == nhits)
52  return; // protect against empty events
53 
54  // take all layer pairs into account
57  // exclude forward "jumping" layer pairs
59  }
60  if (params_.minHitsPerNtuplet_ > 3) {
61  // for quadruplets, exclude all "jumping" layer pairs
63  }
64 
66  gpuPixelDoublets::getDoubletsFromHisto(device_theCells_.get(),
70  hh.view(),
78 }
constexpr uint32_t maxNumOfActiveDoublets
Definition: CAConstants.h:41
auto const & hh
constexpr int nPairs
assert(be >=bs)
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
unique_ptr< unsigned char[]> cellStorage_
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:28
constexpr int nPairsForTriplets
caConstants::CellNeighbors * device_theCellNeighborsContainer_
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int nActualPairs
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
GPUCACell::OuterHitOfCell isOuterHitOfCell_
caConstants::CellTracks * device_theCellTracksContainer_
constexpr int nPairsForQuadruplets
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_

◆ classifyTuples()

void CAHitNtupletGeneratorKernelsCPU::classifyTuples ( HitsOnCPU const &  hh,
TkSoA tuples_d,
cudaStream_t  cudaStream 
)

Definition at line 147 of file CAHitNtupletGeneratorKernels.cc.

References CAHitNtupletGeneratorKernels< TTraits >::counters_, cAHitNtupletGenerator::Params::cuts_, CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_, CAHitNtupletGeneratorKernels< TTraits >::device_hitTuple_apc_, CAHitNtupletGeneratorKernels< TTraits >::device_nCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighbors_, CAHitNtupletGeneratorKernels< TTraits >::device_theCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracks_, CAHitNtupletGeneratorKernels< TTraits >::device_tupleMultiplicity_, cAHitNtupletGenerator::Params::doSharedHitCut_, cAHitNtupletGenerator::Params::doStats_, cAHitNtupletGenerator::Params::dupPassThrough_, hh, TrackSoAHeterogeneousT< S >::hitIndices, CAHitNtupletGeneratorKernels< TTraits >::hitToTupleView_, iev, CAHitNtupletGeneratorKernels< TTraits >::isOuterHitOfCell_, cAHitNtupletGenerator::Params::lateFishbone_, CommonMethods::lock(), cAHitNtupletGenerator::Params::maxNumberOfDoublets_, cAHitNtupletGenerator::Params::minHitsForSharingCut_, mutex, nhits, CAHitNtupletGeneratorKernels< TTraits >::params_, TrackSoAHeterogeneousT< S >::qualityData(), and cAHitNtupletGenerator::Params::useSimpleTripletCleaner_.

Referenced by CAHitNtupletGeneratorOnGPU::makeTuples(), and CAHitNtupletGeneratorOnGPU::makeTuplesAsync().

147  {
148  int32_t nhits = hh.nHits();
149 
150  auto const *tuples_d = &tracks_d->hitIndices;
151  auto *quality_d = tracks_d->qualityData();
152 
153  // classify tracks based on kinematics
154  kernel_classifyTracks(tuples_d, tracks_d, params_.cuts_, quality_d);
155 
156  if (params_.lateFishbone_) {
157  // apply fishbone cleaning to good tracks
158  kernel_fishboneCleaner(device_theCells_.get(), device_nCells_, quality_d);
159  }
160 
161  // remove duplicates (tracks that share a doublet)
162  kernel_fastDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, params_.dupPassThrough_);
163 
164  // fill hit->track "map"
166  kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
167  cms::cuda::launchFinalize(hitToTupleView_, cudaStream);
168  kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
169  }
170 
171  // remove duplicates (tracks that share at least one hit)
172  if (params_.doSharedHitCut_) {
173  kernel_rejectDuplicate(
175 
176  kernel_sharedHitCleaner(hh.view(),
177  tracks_d,
178  quality_d,
181  device_hitToTuple_.get());
183  kernel_simpleTripletCleaner(
185  } else {
186  kernel_tripletCleaner(
188  }
189  }
190 
191  if (params_.doStats_) {
192  std::lock_guard guard(lock_stat);
193  kernel_checkOverflows(tuples_d,
195  device_hitToTuple_.get(),
197  device_theCells_.get(),
200  device_theCellTracks_.get(),
202  nhits,
204  counters_);
205  }
206 
207  if (params_.doStats_) {
208  // counters (add flag???)
209  std::lock_guard guard(lock_stat);
210  kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_);
211  kernel_doStatsForTracks(tuples_d, quality_d, counters_);
212  }
213 
214 #ifdef DUMP_GPU_TK_TUPLES
215  static std::atomic<int> iev(0);
216  static std::mutex lock;
217  {
218  std::lock_guard<std::mutex> guard(lock);
219  ++iev;
220  kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 0, 1000000, iev);
221  }
222 #endif
223 }
unique_ptr< HitToTuple > device_hitToTuple_
static std::mutex mutex
Definition: Proxy.cc:8
auto const & hh
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t int iev
GPUCACell::OuterHitOfCell isOuterHitOfCell_
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_

◆ cleanup()

template<typename TTraits >
void CAHitNtupletGeneratorKernels< TTraits >::cleanup ( cudaStream_t  cudaStream)

◆ launchKernels()

void CAHitNtupletGeneratorKernelsCPU::launchKernels ( HitsOnCPU const &  hh,
TkSoA tuples_d,
cudaStream_t  cudaStream 
)

Definition at line 81 of file CAHitNtupletGeneratorKernels.cc.

References cms::cuda::assert(), cAHitNtupletGenerator::Params::CAThetaCutBarrel_, cAHitNtupletGenerator::Params::CAThetaCutForward_, cAHitNtupletGenerator::Params::dcaCutInnerTriplet_, cAHitNtupletGenerator::Params::dcaCutOuterTriplet_, TrackSoAHeterogeneousT< S >::detIndices, CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_apc_, CAHitNtupletGeneratorKernels< TTraits >::device_hitTuple_apc_, CAHitNtupletGeneratorKernels< TTraits >::device_nCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighbors_, CAHitNtupletGeneratorKernels< TTraits >::device_theCells_, CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracks_, CAHitNtupletGeneratorKernels< TTraits >::device_tupleMultiplicity_, cAHitNtupletGenerator::Params::doStats_, cAHitNtupletGenerator::Params::dupPassThrough_, cAHitNtupletGenerator::Params::earlyFishbone_, cAHitNtupletGenerator::Params::hardCurvCut_, hh, TrackSoAHeterogeneousT< S >::hitIndices, CAHitNtupletGeneratorKernels< TTraits >::isOuterHitOfCell_, cAHitNtupletGenerator::Params::lateFishbone_, cAHitNtupletGenerator::Params::minHitsPerNtuplet_, nhits, CAHitNtupletGeneratorKernels< TTraits >::params_, cAHitNtupletGenerator::Params::ptmin_, and TrackSoAHeterogeneousT< S >::qualityData().

Referenced by CAHitNtupletGeneratorOnGPU::makeTuples(), and CAHitNtupletGeneratorOnGPU::makeTuplesAsync().

81  {
82  auto *tuples_d = &tracks_d->hitIndices;
83  auto *detId_d = &tracks_d->detIndices;
84  auto *quality_d = tracks_d->qualityData();
85 
86  assert(tuples_d && quality_d);
87 
88  // zero tuples
89  cms::cuda::launchZero(tuples_d, cudaStream);
90 
91  auto nhits = hh.nHits();
92 
93  // std::cout << "N hits " << nhits << std::endl;
94  // if (nhits<2) std::cout << "too few hits " << nhits << std::endl;
95 
96  //
97  // applying conbinatoric cleaning such as fishbone at this stage is too expensive
98  //
99 
100  kernel_connect(device_hitTuple_apc_,
101  device_hitToTuple_apc_, // needed only to be reset, ready for next kernel
102  hh.view(),
103  device_theCells_.get(),
108  params_.ptmin_,
113 
114  if (nhits > 1 && params_.earlyFishbone_) {
115  gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, false);
116  }
117 
118  kernel_find_ntuplets(hh.view(),
119  device_theCells_.get(),
121  device_theCellTracks_.get(),
122  tuples_d,
124  quality_d,
126  if (params_.doStats_)
127  kernel_mark_used(device_theCells_.get(), device_nCells_);
128 
129  cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d);
130 
131  kernel_fillHitDetIndices(tuples_d, hh.view(), detId_d);
132  kernel_fillNLayers(tracks_d, device_hitTuple_apc_);
133 
134  // remove duplicates (tracks that share a doublet)
135  kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, quality_d, params_.dupPassThrough_);
136 
137  kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
138  cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream);
139  kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
140 
141  if (nhits > 1 && params_.lateFishbone_) {
142  gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, true);
143  }
144 }
auto const & hh
assert(be >=bs)
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
GPUCACell::OuterHitOfCell isOuterHitOfCell_
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_

◆ nDoubletBlocks()

template<typename TTraits >
uint32_t CAHitNtupletGeneratorKernels< TTraits >::nDoubletBlocks ( uint32_t  blockSize)
inlineprivate

Compute the number of doublet blocks for block size.

Definition at line 227 of file CAHitNtupletGeneratorKernels.h.

References CAHitNtupletGeneratorKernels< TTraits >::paramsMaxDoubletes3Quarters_.

227  {
228  // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed.
229  return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize;
230  }
const uint32_t paramsMaxDoubletes3Quarters_
Intermediate result avoiding repeated computations.

◆ nQuadrupletBlocks()

template<typename TTraits >
uint32_t CAHitNtupletGeneratorKernels< TTraits >::nQuadrupletBlocks ( uint32_t  blockSize)
inlineprivate

Compute the number of quadruplet blocks for block size.

Definition at line 233 of file CAHitNtupletGeneratorKernels.h.

References caConstants::maxNumberOfQuadruplets.

233  {
234  // caConstants::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4
235  return (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize;
236  }
constexpr uint32_t maxNumberOfQuadruplets
Definition: CAConstants.h:42

◆ printCounters()

void CAHitNtupletGeneratorKernelsCPU::printCounters ( Counters const *  counters)
static

Definition at line 12 of file CAHitNtupletGeneratorKernels.cc.

References counters.

Referenced by CAHitNtupletGeneratorOnGPU::endJob().

12  {
13  kernel_printCounters(counters);
14 }
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters

◆ setCounters()

template<typename TTraits >
void CAHitNtupletGeneratorKernels< TTraits >::setCounters ( Counters counters)
inline

Definition at line 194 of file CAHitNtupletGeneratorKernels.h.

References counters, and CAHitNtupletGeneratorKernels< TTraits >::counters_.

Referenced by CAHitNtupletGeneratorOnGPU::makeTuples(), and CAHitNtupletGeneratorOnGPU::makeTuplesAsync().

194 { counters_ = counters; }
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters

◆ tupleMultiplicity()

template<typename TTraits >
TupleMultiplicity const* CAHitNtupletGeneratorKernels< TTraits >::tupleMultiplicity ( ) const
inline

Member Data Documentation

◆ cellStorage_

template<typename TTraits >
unique_ptr<unsigned char[]> CAHitNtupletGeneratorKernels< TTraits >::cellStorage_
private

◆ counters_

template<typename TTraits >
Counters* CAHitNtupletGeneratorKernels< TTraits >::counters_ = nullptr
private

◆ device_hitToTuple_

template<typename TTraits >
unique_ptr<HitToTuple> CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_
private

◆ device_hitToTuple_apc_

template<typename TTraits >
cms::cuda::AtomicPairCounter* CAHitNtupletGeneratorKernels< TTraits >::device_hitToTuple_apc_ = nullptr
private

◆ device_hitToTupleStorage_

template<typename TTraits >
unique_ptr<HitToTuple::Counter[]> CAHitNtupletGeneratorKernels< TTraits >::device_hitToTupleStorage_
private

◆ device_hitTuple_apc_

template<typename TTraits >
cms::cuda::AtomicPairCounter* CAHitNtupletGeneratorKernels< TTraits >::device_hitTuple_apc_ = nullptr
private

◆ device_isOuterHitOfCell_

template<typename TTraits >
unique_ptr<GPUCACell::OuterHitOfCellContainer[]> CAHitNtupletGeneratorKernels< TTraits >::device_isOuterHitOfCell_
private

◆ device_nCells_

template<typename TTraits >
uint32_t* CAHitNtupletGeneratorKernels< TTraits >::device_nCells_ = nullptr
private

◆ device_storage_

template<typename TTraits >
unique_ptr<cms::cuda::AtomicPairCounter::c_type[]> CAHitNtupletGeneratorKernels< TTraits >::device_storage_
private

◆ device_theCellNeighbors_

template<typename TTraits >
unique_ptr<caConstants::CellNeighborsVector> CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighbors_
private

◆ device_theCellNeighborsContainer_

template<typename TTraits >
caConstants::CellNeighbors* CAHitNtupletGeneratorKernels< TTraits >::device_theCellNeighborsContainer_
private

◆ device_theCells_

template<typename TTraits >
unique_ptr<GPUCACell[]> CAHitNtupletGeneratorKernels< TTraits >::device_theCells_
private

◆ device_theCellTracks_

template<typename TTraits >
unique_ptr<caConstants::CellTracksVector> CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracks_
private

◆ device_theCellTracksContainer_

template<typename TTraits >
caConstants::CellTracks* CAHitNtupletGeneratorKernels< TTraits >::device_theCellTracksContainer_
private

◆ device_tupleMultiplicity_

template<typename TTraits >
unique_ptr<TupleMultiplicity> CAHitNtupletGeneratorKernels< TTraits >::device_tupleMultiplicity_
private

◆ hitToTupleView_

template<typename TTraits >
HitToTuple::View CAHitNtupletGeneratorKernels< TTraits >::hitToTupleView_
private

◆ isOuterHitOfCell_

template<typename TTraits >
GPUCACell::OuterHitOfCell CAHitNtupletGeneratorKernels< TTraits >::isOuterHitOfCell_
private

◆ params_

template<typename TTraits >
Params const& CAHitNtupletGeneratorKernels< TTraits >::params_
private

◆ paramsMaxDoubletes3Quarters_

template<typename TTraits >
const uint32_t CAHitNtupletGeneratorKernels< TTraits >::paramsMaxDoubletes3Quarters_
private

Intermediate result avoiding repeated computations.

Definition at line 225 of file CAHitNtupletGeneratorKernels.h.

Referenced by CAHitNtupletGeneratorKernels< TTraits >::nDoubletBlocks().