CMS 3D CMS Logo

CAHitNtupletGeneratorKernels.cc
Go to the documentation of this file.
2 
3 template <>
5  kernel_printCounters(counters);
6 }
7 
8 template <>
9 void CAHitNtupletGeneratorKernelsCPU::fillHitDetIndices(HitsView const *hv, TkSoA *tracks_d, cudaStream_t) {
10  kernel_fillHitDetIndices(&tracks_d->hitIndices, hv, &tracks_d->detIndices);
11 }
12 
13 template <>
15  auto nhits = hh.nHits();
16 
17 #ifdef NTUPLE_DEBUG
18  std::cout << "building Doublets out of " << nhits << " Hits" << std::endl;
19 #endif
20 
21  // use "nhits" to heuristically dimension the workspace
22 
23  // no need to use the Traits allocations, since we know this is being compiled for the CPU
24  //device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
25  device_isOuterHitOfCell_ = std::make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits));
27 
28  auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) +
30  // no need to use the Traits allocations, since we know this is being compiled for the CPU
31  //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);
32  cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
35  sizeof(GPUCACell::CellNeighbors));
36 
37  gpuPixelDoublets::initDoublets(device_isOuterHitOfCell_.get(),
38  nhits,
43 
44  // no need to use the Traits allocations, since we know this is being compiled for the CPU
45  //device_theCells_ = Traits::template make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_, stream);
46  device_theCells_ = std::make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_);
47  if (0 == nhits)
48  return; // protect against empty events
49 
50  // take all layer pairs into account
53  // exclude forward "jumping" layer pairs
55  }
56  if (params_.minHitsPerNtuplet_ > 3) {
57  // for quadruplets, exclude all "jumping" layer pairs
59  }
60 
62  gpuPixelDoublets::getDoubletsFromHisto(device_theCells_.get(),
66  hh.view(),
74 }
75 
76 template <>
77 void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
78  auto *tuples_d = &tracks_d->hitIndices;
79  auto *quality_d = tracks_d->qualityData();
80 
81  assert(tuples_d && quality_d);
82 
83  // zero tuples
84  cms::cuda::launchZero(tuples_d, cudaStream);
85 
86  auto nhits = hh.nHits();
87 
88  // std::cout << "N hits " << nhits << std::endl;
89  // if (nhits<2) std::cout << "too few hits " << nhits << std::endl;
90 
91  //
92  // applying conbinatoric cleaning such as fishbone at this stage is too expensive
93  //
94 
95  kernel_connect(device_hitTuple_apc_,
96  device_hitToTuple_apc_, // needed only to be reset, ready for next kernel
97  hh.view(),
98  device_theCells_.get(),
103  params_.ptmin_,
108 
109  if (nhits > 1 && params_.earlyFishbone_) {
110  gpuPixelDoublets::fishbone(
111  hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false);
112  }
113 
114  kernel_find_ntuplets(hh.view(),
115  device_theCells_.get(),
117  device_theCellTracks_.get(),
118  tuples_d,
120  quality_d,
122  if (params_.doStats_)
123  kernel_mark_used(hh.view(), device_theCells_.get(), device_nCells_);
124 
125  cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d);
126 
127  // remove duplicates (tracks that share a doublet)
128  kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, quality_d, params_.dupPassThrough_);
129 
130  kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
131  cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream);
132  kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
133 
134  if (nhits > 1 && params_.lateFishbone_) {
135  gpuPixelDoublets::fishbone(
136  hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true);
137  }
138 
139  if (params_.doStats_) {
140  kernel_checkOverflows(tuples_d,
142  device_hitToTuple_.get(),
144  device_theCells_.get(),
147  device_theCellTracks_.get(),
149  nhits,
151  counters_);
152  }
153 }
154 
155 template <>
156 void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
157  auto const *tuples_d = &tracks_d->hitIndices;
158  auto *quality_d = tracks_d->qualityData();
159 
160  // classify tracks based on kinematics
161  kernel_classifyTracks(tuples_d, tracks_d, params_.cuts_, quality_d);
162 
163  if (params_.lateFishbone_) {
164  // apply fishbone cleaning to good tracks
165  kernel_fishboneCleaner(device_theCells_.get(), device_nCells_, quality_d);
166  }
167 
168  // remove duplicates (tracks that share a doublet)
169  kernel_fastDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, tracks_d, params_.dupPassThrough_);
170 
171  // fill hit->track "map"
173  kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
174  cms::cuda::launchFinalize(hitToTupleView_, cudaStream);
175  kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
176  }
177 
178  // remove duplicates (tracks that share at least one hit)
179  if (params_.doSharedHitCut_) {
180  kernel_rejectDuplicate(hh.view(),
181  tuples_d,
182  tracks_d,
183  quality_d,
186  device_hitToTuple_.get());
187 
188  kernel_sharedHitCleaner(hh.view(),
189  tuples_d,
190  tracks_d,
191  quality_d,
194  device_hitToTuple_.get());
196  kernel_simpleTripletCleaner(hh.view(),
197  tuples_d,
198  tracks_d,
199  quality_d,
202  device_hitToTuple_.get());
203  } else {
204  kernel_tripletCleaner(hh.view(),
205  tuples_d,
206  tracks_d,
207  quality_d,
210  device_hitToTuple_.get());
211  }
212  }
213  if (params_.doStats_) {
214  // counters (add flag???)
215  kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_);
216  kernel_doStatsForTracks(tuples_d, quality_d, counters_);
217  }
218 
219 #ifdef DUMP_GPU_TK_TUPLES
220  static std::atomic<int> iev(0);
221  ++iev;
222  kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev);
223 #endif
224 }
cAHitNtupletGenerator::Params::earlyFishbone_
const bool earlyFishbone_
Definition: CAHitNtupletGeneratorKernels.h:114
CAHitNtupletGeneratorKernels::device_theCellTracksContainer_
caConstants::CellTracks * device_theCellTracksContainer_
Definition: CAHitNtupletGeneratorKernels.h:205
CAHitNtupletGeneratorKernels::device_hitTuple_apc_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:217
iev
const HitContainer *__restrict__ const TkSoA *__restrict__ const Quality *__restrict__ const CAHitNtupletGeneratorKernelsGPU::HitToTuple *__restrict__ int32_t int iev
Definition: CAHitNtupletGeneratorKernelsImpl.h:862
cAHitNtupletGenerator::Params::minHitsForSharingCut_
const uint16_t minHitsForSharingCut_
Definition: CAHitNtupletGeneratorKernels.h:110
GPUCACell::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:26
gather_cfg.cout
cout
Definition: gather_cfg.py:144
CAHitNtupletGeneratorKernels::params_
Params const & params_
Definition: CAHitNtupletGeneratorKernels.h:223
TrackingRecHit2DHeterogeneous
Definition: TrackingRecHit2DHeterogeneous.h:8
cms::cuda::stream
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51
cms::cuda::assert
assert(be >=bs)
CAHitNtupletGeneratorKernels::device_theCellTracks_
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
Definition: CAHitNtupletGeneratorKernels.h:204
cAHitNtupletGenerator::Params::dcaCutInnerTriplet_
const float dcaCutInnerTriplet_
Definition: CAHitNtupletGeneratorKernels.h:128
CAHitNtupletGeneratorKernels::device_theCellNeighbors_
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
Definition: CAHitNtupletGeneratorKernels.h:202
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16
CAHitNtupletGeneratorKernels::device_theCells_
unique_ptr< GPUCACell[]> device_theCells_
Definition: CAHitNtupletGeneratorKernels.h:207
CAHitNtupletGeneratorKernels::launchKernels
void launchKernels(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
Definition: CAHitNtupletGeneratorKernels.cc:77
CAHitNtupletGeneratorKernels::device_nCells_
uint32_t * device_nCells_
Definition: CAHitNtupletGeneratorKernels.h:209
CAHitNtupletGeneratorKernels::device_tupleMultiplicity_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
Definition: CAHitNtupletGeneratorKernels.h:219
CAHitNtupletGeneratorKernels::device_hitToTuple_apc_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:215
cAHitNtupletGenerator::Params::idealConditions_
const bool idealConditions_
Definition: CAHitNtupletGeneratorKernels.h:116
cAHitNtupletGenerator::Params::lateFishbone_
const bool lateFishbone_
Definition: CAHitNtupletGeneratorKernels.h:115
cAHitNtupletGenerator::Params::dupPassThrough_
const bool dupPassThrough_
Definition: CAHitNtupletGeneratorKernels.h:122
cAHitNtupletGenerator::Params::doSharedHitCut_
const bool doSharedHitCut_
Definition: CAHitNtupletGeneratorKernels.h:121
counters
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell *__restrict__ int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
Definition: CAHitNtupletGeneratorKernelsImpl.h:53
cAHitNtupletGenerator::Params::maxNumberOfDoublets_
const uint32_t maxNumberOfDoublets_
Definition: CAHitNtupletGeneratorKernels.h:109
CAHitNtupletGeneratorKernels::device_theCellNeighborsContainer_
caConstants::CellNeighbors * device_theCellNeighborsContainer_
Definition: CAHitNtupletGeneratorKernels.h:203
cAHitNtupletGenerator::Params::minHitsPerNtuplet_
const uint32_t minHitsPerNtuplet_
Definition: CAHitNtupletGeneratorKernels.h:108
cAHitNtupletGenerator::Params::useSimpleTripletCleaner_
const bool useSimpleTripletCleaner_
Definition: CAHitNtupletGeneratorKernels.h:123
CAHitNtupletGeneratorKernels::device_hitToTuple_
unique_ptr< HitToTuple > device_hitToTuple_
Definition: CAHitNtupletGeneratorKernels.h:211
cAHitNtupletGenerator::Params::hardCurvCut_
const float hardCurvCut_
Definition: CAHitNtupletGeneratorKernels.h:127
CAHitNtupletGeneratorKernels::classifyTuples
void classifyTuples(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
Definition: CAHitNtupletGeneratorKernels.cc:156
cAHitNtupletGenerator::Params::dcaCutOuterTriplet_
const float dcaCutOuterTriplet_
Definition: CAHitNtupletGeneratorKernels.h:129
caConstants::maxNumOfActiveDoublets
constexpr uint32_t maxNumOfActiveDoublets
Definition: CAConstants.h:40
nhits
Definition: HIMultiTrackSelector.h:42
gpuPixelDoublets::nPairsForQuadruplets
constexpr int nPairsForQuadruplets
Definition: gpuPixelDoublets.h:10
mitigatedMETSequence_cff.U
U
Definition: mitigatedMETSequence_cff.py:36
gpuPixelDoublets::nActualPairs
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int nActualPairs
Definition: gpuPixelDoublets.h:99
TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:23
CAHitNtupletGeneratorKernels::device_isOuterHitOfCell_
unique_ptr< GPUCACell::OuterHitOfCell[]> device_isOuterHitOfCell_
Definition: CAHitNtupletGeneratorKernels.h:208
hh
const auto & hh
Definition: CAHitNtupletGeneratorKernelsImpl.h:552
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
cAHitNtupletGenerator::Params::cuts_
QualityCuts cuts_
Definition: CAHitNtupletGeneratorKernels.h:132
cAHitNtupletGenerator::Params::doPtCut_
const bool doPtCut_
Definition: CAHitNtupletGeneratorKernels.h:120
cAHitNtupletGenerator::Params::includeJumpingForwardDoublets_
const bool includeJumpingForwardDoublets_
Definition: CAHitNtupletGeneratorKernels.h:113
cAHitNtupletGenerator::Params::CAThetaCutForward_
const float CAThetaCutForward_
Definition: CAHitNtupletGeneratorKernels.h:126
cms::cuda::VecArray
Definition: VecArray.h:14
CAHitNtupletGeneratorKernels::fillHitDetIndices
void fillHitDetIndices(HitsView const *hv, TkSoA *tuples_d, cudaStream_t cudaStream)
Definition: CAHitNtupletGeneratorKernels.cc:9
cAHitNtupletGenerator::Params::doZ0Cut_
const bool doZ0Cut_
Definition: CAHitNtupletGeneratorKernels.h:119
CAHitNtupletGeneratorKernels::printCounters
static void printCounters(Counters const *counters)
Definition: CAHitNtupletGeneratorKernels.cc:4
gpuPixelDoublets::nPairsForTriplets
constexpr int nPairsForTriplets
Definition: gpuPixelDoublets.h:11
cAHitNtupletGenerator::Counters
Definition: CAHitNtupletGeneratorKernels.h:14
gpuPixelDoublets::nPairs
constexpr int nPairs
Definition: gpuPixelDoublets.h:12
CAHitNtupletGeneratorKernelsImpl.h
CAHitNtupletGeneratorKernels::hitToTupleView_
HitToTuple::View hitToTupleView_
Definition: CAHitNtupletGeneratorKernels.h:213
TrackSoAHeterogeneousT::hitIndices
HitContainer hitIndices
Definition: TrackSoAHeterogeneousT.h:62
TrackSoAHeterogeneousT::qualityData
constexpr const Quality * qualityData() const
Definition: TrackSoAHeterogeneousT.h:39
CAHitNtupletGeneratorKernels::cellStorage_
unique_ptr< unsigned char[]> cellStorage_
Definition: CAHitNtupletGeneratorKernels.h:201
TrackSoAHeterogeneousT::detIndices
HitContainer detIndices
Definition: TrackSoAHeterogeneousT.h:63
CAHitNtupletGeneratorKernels::buildDoublets
void buildDoublets(HitsOnCPU const &hh, cudaStream_t stream)
Definition: CAHitNtupletGeneratorKernels.cc:14
cAHitNtupletGenerator::Params::doClusterCut_
const bool doClusterCut_
Definition: CAHitNtupletGeneratorKernels.h:118
CAHitNtupletGeneratorKernels::counters_
Counters * counters_
Definition: CAHitNtupletGeneratorKernels.h:198
cAHitNtupletGenerator::Params::doStats_
const bool doStats_
Definition: CAHitNtupletGeneratorKernels.h:117
cAHitNtupletGenerator::Params::ptmin_
const float ptmin_
Definition: CAHitNtupletGeneratorKernels.h:124
cAHitNtupletGenerator::Params::CAThetaCutBarrel_
const float CAThetaCutBarrel_
Definition: CAHitNtupletGeneratorKernels.h:125