CMS 3D CMS Logo

CAHitNtupletGeneratorKernels.cc
Go to the documentation of this file.
2 
3 #include <mutex>
4 
5 namespace {
6  // cuda atomics are NOT atomics on CPU so protect stat update with a mutex
7  // waiting for a more general solution (incuding multiple devices) to be proposed and implemented
8  std::mutex lock_stat;
9 } // namespace
10 
11 template <>
13  kernel_printCounters(counters);
14 }
15 
16 template <>
18  auto nhits = hh.nHits();
19 
20 #ifdef NTUPLE_DEBUG
21  std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << hh.offsetBPIX2() << std::endl;
22 #endif
23 
24  // use "nhits" to heuristically dimension the workspace
25 
26  // no need to use the Traits allocations, since we know this is being compiled for the CPU
27  //device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
28  device_isOuterHitOfCell_ = std::make_unique<GPUCACell::OuterHitOfCellContainer[]>(std::max(1U, nhits));
31 
32  auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) +
34  // no need to use the Traits allocations, since we know this is being compiled for the CPU
35  //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);
36  cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
39  sizeof(GPUCACell::CellNeighbors));
40 
41  gpuPixelDoublets::initDoublets(isOuterHitOfCell_,
42  nhits,
47 
48  // no need to use the Traits allocations, since we know this is being compiled for the CPU
49  //device_theCells_ = Traits::template make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_, stream);
50  device_theCells_ = std::make_unique<GPUCACell[]>(params_.maxNumberOfDoublets_);
51  if (0 == nhits)
52  return; // protect against empty events
53 
54  // take all layer pairs into account
57  // exclude forward "jumping" layer pairs
59  }
60  if (params_.minHitsPerNtuplet_ > 3) {
61  // for quadruplets, exclude all "jumping" layer pairs
63  }
64 
66  gpuPixelDoublets::getDoubletsFromHisto(device_theCells_.get(),
70  hh.view(),
78 }
79 
80 template <>
81 void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
82  auto *tuples_d = &tracks_d->hitIndices;
83  auto *detId_d = &tracks_d->detIndices;
84  auto *quality_d = tracks_d->qualityData();
85 
86  assert(tuples_d && quality_d);
87 
88  // zero tuples
89  cms::cuda::launchZero(tuples_d, cudaStream);
90 
91  auto nhits = hh.nHits();
92 
93  // std::cout << "N hits " << nhits << std::endl;
94  // if (nhits<2) std::cout << "too few hits " << nhits << std::endl;
95 
96  //
97  // applying conbinatoric cleaning such as fishbone at this stage is too expensive
98  //
99 
100  kernel_connect(device_hitTuple_apc_,
101  device_hitToTuple_apc_, // needed only to be reset, ready for next kernel
102  hh.view(),
103  device_theCells_.get(),
108  params_.ptmin_,
113 
114  if (nhits > 1 && params_.earlyFishbone_) {
115  gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, false);
116  }
117 
118  kernel_find_ntuplets(hh.view(),
119  device_theCells_.get(),
121  device_theCellTracks_.get(),
122  tuples_d,
124  quality_d,
126  if (params_.doStats_)
127  kernel_mark_used(device_theCells_.get(), device_nCells_);
128 
129  cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d);
130 
131  kernel_fillHitDetIndices(tuples_d, hh.view(), detId_d);
132  kernel_fillNLayers(tracks_d, device_hitTuple_apc_);
133 
134  // remove duplicates (tracks that share a doublet)
135  kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, quality_d, params_.dupPassThrough_);
136 
137  kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
138  cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream);
139  kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
140 
141  if (nhits > 1 && params_.lateFishbone_) {
142  gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, true);
143  }
144 }
145 
146 template <>
147 void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
148  int32_t nhits = hh.nHits();
149 
150  auto const *tuples_d = &tracks_d->hitIndices;
151  auto *quality_d = tracks_d->qualityData();
152 
153  // classify tracks based on kinematics
154  kernel_classifyTracks(tuples_d, tracks_d, params_.cuts_, quality_d);
155 
156  if (params_.lateFishbone_) {
157  // apply fishbone cleaning to good tracks
158  kernel_fishboneCleaner(device_theCells_.get(), device_nCells_, quality_d);
159  }
160 
161  // remove duplicates (tracks that share a doublet)
162  kernel_fastDuplicateRemover(device_theCells_.get(), device_nCells_, tracks_d, params_.dupPassThrough_);
163 
164  // fill hit->track "map"
166  kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
167  cms::cuda::launchFinalize(hitToTupleView_, cudaStream);
168  kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
169  }
170 
171  // remove duplicates (tracks that share at least one hit)
172  if (params_.doSharedHitCut_) {
173  kernel_rejectDuplicate(
175 
176  kernel_sharedHitCleaner(hh.view(),
177  tracks_d,
178  quality_d,
181  device_hitToTuple_.get());
183  kernel_simpleTripletCleaner(
185  } else {
186  kernel_tripletCleaner(
188  }
189  }
190 
191  if (params_.doStats_) {
192  std::lock_guard guard(lock_stat);
193  kernel_checkOverflows(tuples_d,
195  device_hitToTuple_.get(),
197  device_theCells_.get(),
200  device_theCellTracks_.get(),
202  nhits,
204  counters_);
205  }
206 
207  if (params_.doStats_) {
208  // counters (add flag???)
209  std::lock_guard guard(lock_stat);
210  kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_);
211  kernel_doStatsForTracks(tuples_d, quality_d, counters_);
212  }
213 
214 #ifdef DUMP_GPU_TK_TUPLES
215  static std::atomic<int> iev(0);
216  static std::mutex lock;
217  {
218  std::lock_guard<std::mutex> guard(lock);
219  ++iev;
220  kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 0, 1000000, iev);
221  }
222 #endif
223 }
void launchKernels(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
constexpr uint32_t maxNumOfActiveDoublets
Definition: CAConstants.h:41
unique_ptr< HitToTuple > device_hitToTuple_
void buildDoublets(HitsOnCPU const &hh, cudaStream_t stream)
void classifyTuples(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
static std::mutex mutex
Definition: Proxy.cc:8
auto const & hh
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
constexpr int nPairs
assert(be >=bs)
static void printCounters(Counters const *counters)
constexpr Quality const * qualityData() const
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
unique_ptr< unsigned char[]> cellStorage_
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:28
constexpr int nPairsForTriplets
caConstants::CellNeighbors * device_theCellNeighborsContainer_
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int nActualPairs
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t int iev
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
GPUCACell::OuterHitOfCell isOuterHitOfCell_
caConstants::CellTracks * device_theCellTracksContainer_
constexpr int nPairsForQuadruplets
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_