CMS 3D CMS Logo

CAHitNtupletGeneratorKernels.cc
Go to the documentation of this file.
1 #include <mutex>
2 
4 
5 namespace {
6  // cuda atomics are NOT atomics on CPU so protect stat update with a mutex
7  // waiting for a more general solution (incuding multiple devices) to be proposed and implemented
8  std::mutex lock_stat;
9 } // namespace
10 
11 template <typename TrackerTraits>
13  caHitNtupletGeneratorKernels::kernel_printCounters(counters);
14 }
15 
16 template <typename TrackerTraits>
18  int32_t offsetBPIX2,
19  cudaStream_t stream) {
20  using namespace gpuPixelDoublets;
21 
22  using GPUCACell = GPUCACellT<TrackerTraits>;
24  using CellNeighbors = typename GPUCACell::CellNeighbors;
25  using CellTracks = typename GPUCACell::CellTracks;
26  using OuterHitOfCellContainer = typename GPUCACell::OuterHitOfCellContainer;
27 
28  auto nhits = hh.nHits();
29 
30 #ifdef NTUPLE_DEBUG
31  std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << offsetBPIX2 << std::endl;
32 #endif
33 
34  // use "nhits" to heuristically dimension the workspace
35 
36  // no need to use the Traits allocations, since we know this is being compiled for the CPU
37  //this->device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
38  this->device_isOuterHitOfCell_ = std::make_unique<OuterHitOfCellContainer[]>(std::max(1U, nhits));
39  assert(this->device_isOuterHitOfCell_.get());
40  this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2};
41 
42  auto cellStorageSize = TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) +
43  TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks);
44  // no need to use the Traits allocations, since we know this is being compiled for the CPU
45  //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);
46  this->cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
47  this->device_theCellNeighborsContainer_ = (CellNeighbors *)this->cellStorage_.get();
48  this->device_theCellTracksContainer_ =
49  (CellTracks *)(this->cellStorage_.get() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors));
50 
51  initDoublets<TrackerTraits>(this->isOuterHitOfCell_,
52  nhits,
53  this->device_theCellNeighbors_.get(),
54  this->device_theCellNeighborsContainer_,
55  this->device_theCellTracks_.get(),
56  this->device_theCellTracksContainer_);
57 
58  // no need to use the Traits allocations, since we know this is being compiled for the CPU
59  this->device_theCells_ = std::make_unique<GPUCACell[]>(this->params_.caParams_.maxNumberOfDoublets_);
60  if (0 == nhits)
61  return; // protect against empty events
62 
63  // take all layer pairs into account
64  auto nActualPairs = this->params_.nPairs();
65 
67 
68  getDoubletsFromHisto<TrackerTraits>(this->device_theCells_.get(),
69  this->device_nCells_,
70  this->device_theCellNeighbors_.get(),
71  this->device_theCellTracks_.get(),
72  hh,
73  this->isOuterHitOfCell_,
75  this->params_.caParams_.maxNumberOfDoublets_,
76  this->device_cellCuts_.get());
77 }
78 
79 template <typename TrackerTraits>
82  cudaStream_t cudaStream) {
83  using namespace caHitNtupletGeneratorKernels;
84 
85  // zero tuples
86  cms::cuda::launchZero(&tracks_view.hitIndices(), cudaStream);
87 
88  uint32_t nhits = hh.metadata().size();
89 
90 #ifdef NTUPLE_DEBUG
91  std::cout << "start tuple building. N hits " << nhits << std::endl;
92  if (nhits < 2)
93  std::cout << "too few hits " << nhits << std::endl;
94 #endif
95 
96  //
97  // applying conbinatoric cleaning such as fishbone at this stage is too expensive
98  //
99 
100  kernel_connect<TrackerTraits>(this->device_hitTuple_apc_,
101  this->device_hitToTuple_apc_, // needed only to be reset, ready for next kernel
102  hh,
103  this->device_theCells_.get(),
104  this->device_nCells_,
105  this->device_theCellNeighbors_.get(),
106  this->isOuterHitOfCell_,
107  this->params_.caParams_);
108 
109  if (nhits > 1 && this->params_.earlyFishbone_) {
110  gpuPixelDoublets::fishbone<TrackerTraits>(
111  hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, false);
112  }
113 
114  kernel_find_ntuplets<TrackerTraits>(hh,
115  tracks_view,
116  this->device_theCells_.get(),
117  this->device_nCells_,
118  this->device_theCellTracks_.get(),
119  this->device_hitTuple_apc_,
120  this->params_.caParams_);
121  if (this->params_.doStats_)
122  kernel_mark_used(this->device_theCells_.get(), this->device_nCells_);
123 
124  cms::cuda::finalizeBulk(this->device_hitTuple_apc_, &tracks_view.hitIndices());
125 
126  kernel_fillHitDetIndices<TrackerTraits>(tracks_view, hh);
127  kernel_fillNLayers<TrackerTraits>(tracks_view, this->device_hitTuple_apc_);
128 
129  // remove duplicates (tracks that share a doublet)
130  kernel_earlyDuplicateRemover<TrackerTraits>(
131  this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
132 
133  kernel_countMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
134  cms::cuda::launchFinalize(this->device_tupleMultiplicity_.get(), cudaStream);
135  kernel_fillMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
136 
137  if (nhits > 1 && this->params_.lateFishbone_) {
138  gpuPixelDoublets::fishbone<TrackerTraits>(
139  hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, true);
140  }
141 }
142 
143 template <typename TrackerTraits>
146  cudaStream_t cudaStream) {
147  using namespace caHitNtupletGeneratorKernels;
148 
149  int32_t nhits = hh.metadata().size();
150 
151  // classify tracks based on kinematics
152  kernel_classifyTracks<TrackerTraits>(tracks_view, this->params_.qualityCuts_);
153  if (this->params_.lateFishbone_) {
154  // apply fishbone cleaning to good tracks
155  kernel_fishboneCleaner<TrackerTraits>(this->device_theCells_.get(), this->device_nCells_, tracks_view);
156  }
157 
158  // remove duplicates (tracks that share a doublet)
159  kernel_fastDuplicateRemover<TrackerTraits>(
160  this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
161 
162  // fill hit->track "map"
163  if (this->params_.doSharedHitCut_ || this->params_.doStats_) {
164  kernel_countHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
165  cms::cuda::launchFinalize(this->hitToTupleView_, cudaStream);
166  kernel_fillHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
167  }
168 
169  // remove duplicates (tracks that share at least one hit)
170  if (this->params_.doSharedHitCut_) {
171  kernel_rejectDuplicate<TrackerTraits>(tracks_view,
172  this->params_.minHitsForSharingCut_,
173  this->params_.dupPassThrough_,
174  this->device_hitToTuple_.get());
175 
176  kernel_sharedHitCleaner<TrackerTraits>(hh,
177  tracks_view,
178  this->params_.minHitsForSharingCut_,
179  this->params_.dupPassThrough_,
180  this->device_hitToTuple_.get());
181  if (this->params_.useSimpleTripletCleaner_) {
182  kernel_simpleTripletCleaner<TrackerTraits>(tracks_view,
183  this->params_.minHitsForSharingCut_,
184  this->params_.dupPassThrough_,
185  this->device_hitToTuple_.get());
186  } else {
187  kernel_tripletCleaner<TrackerTraits>(tracks_view,
188  this->params_.minHitsForSharingCut_,
189  this->params_.dupPassThrough_,
190  this->device_hitToTuple_.get());
191  }
192  }
193 
194  if (this->params_.doStats_) {
195  std::lock_guard guard(lock_stat);
196  kernel_checkOverflows<TrackerTraits>(tracks_view,
197  this->device_tupleMultiplicity_.get(),
198  this->device_hitToTuple_.get(),
199  this->device_hitTuple_apc_,
200  this->device_theCells_.get(),
201  this->device_nCells_,
202  this->device_theCellNeighbors_.get(),
203  this->device_theCellTracks_.get(),
204  this->isOuterHitOfCell_,
205  nhits,
206  this->params_.caParams_.maxNumberOfDoublets_,
207  this->counters_);
208  }
209 
210  if (this->params_.doStats_) {
211  // counters (add flag???)
212  std::lock_guard guard(lock_stat);
213  kernel_doStatsForHitInTracks<TrackerTraits>(this->device_hitToTuple_.get(), this->counters_);
214  kernel_doStatsForTracks<TrackerTraits>(tracks_view, this->counters_);
215  }
216 
217 #ifdef DUMP_GPU_TK_TUPLES
218  static std::atomic<int> iev(0);
219  static std::mutex lock;
220  {
221  std::lock_guard<std::mutex> guard(lock);
222  ++iev;
223  kernel_print_found_ntuplets<TrackerTraits>(hh, tracks_view, this->device_hitToTuple_.get(), 0, 1000000, iev);
224  }
225 #endif
226 }
227 
static void printCounters(Counters const *counters)
void classifyTuples(const HitsConstView &hh, TkSoAView &track_view, cudaStream_t cudaStream)
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:24
static std::mutex mutex
Definition: Proxy.cc:8
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: gpuFishbone.h:20
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TrackSoAView< TrackerTraits > TkSoAView
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t Counters * counters
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int nActualPairs
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
void launchKernels(const HitsConstView &hh, TkSoAView &track_view, cudaStream_t cudaStream)
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:32
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:26
void buildDoublets(const HitsConstView &hh, int32_t offsetBPIX2, cudaStream_t stream)