CMS 3D CMS Logo

CAHitNtupletGeneratorKernels.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
3 
4 // #define GPU_DEBUG
5 
7 #include "GPUCACell.h"
8 
9 // #define DUMP_GPU_TK_TUPLES
10 
12 
13  // counters
14  struct Counters {
15  unsigned long long nEvents;
16  unsigned long long nHits;
17  unsigned long long nCells;
18  unsigned long long nTuples;
19  unsigned long long nFitTracks;
20  unsigned long long nLooseTracks;
21  unsigned long long nGoodTracks;
22  unsigned long long nUsedHits;
23  unsigned long long nDupHits;
24  unsigned long long nFishCells;
25  unsigned long long nKilledCells;
26  unsigned long long nEmptyCells;
27  unsigned long long nZeroTrackCells;
28  };
29 
32 
35 
39 
40  struct QualityCuts {
41  // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
42  float chi2Coeff[4];
43  float chi2MaxPt; // GeV
44  float chi2Scale;
45 
46  struct Region {
47  float maxTip; // cm
48  float minPt; // GeV
49  float maxZip; // cm
50  };
51 
54  };
55 
56  // params (FIXME: thi si a POD: so no constructor no traling _ and no const as params_ is already const)
57  struct Params {
58  Params(bool onGPU,
59  uint32_t minHitsPerNtuplet,
60  uint32_t maxNumberOfDoublets,
61  uint16_t minHitsForSharingCuts,
62  bool useRiemannFit,
63  bool fitNas4,
65  bool earlyFishbone,
66  bool lateFishbone,
67  bool idealConditions,
68  bool doStats,
69  bool doClusterCut,
70  bool doZ0Cut,
71  bool doPtCut,
72  bool doSharedHitCut,
73  bool dupPassThrough,
75  float ptmin,
76  float CAThetaCutBarrel,
77  float CAThetaCutForward,
78  float hardCurvCut,
79  float dcaCutInnerTriplet,
80  float dcaCutOuterTriplet,
81 
82  QualityCuts const& cuts)
83  : onGPU_(onGPU),
86  minHitsForSharingCut_(minHitsForSharingCuts),
93  doStats_(doStats),
100  ptmin_(ptmin),
106  cuts_(cuts) {}
107 
108  const bool onGPU_;
109  const uint32_t minHitsPerNtuplet_;
110  const uint32_t maxNumberOfDoublets_;
111  const uint16_t minHitsForSharingCut_;
112  const bool useRiemannFit_;
113  const bool fitNas4_;
115  const bool earlyFishbone_;
116  const bool lateFishbone_;
117  const bool idealConditions_;
118  const bool doStats_;
119  const bool doClusterCut_;
120  const bool doZ0Cut_;
121  const bool doPtCut_;
122  const bool doSharedHitCut_;
123  const bool dupPassThrough_;
125  const float ptmin_;
126  const float CAThetaCutBarrel_;
127  const float CAThetaCutForward_;
128  const float hardCurvCut_;
129  const float dcaCutInnerTriplet_;
130  const float dcaCutOuterTriplet_;
131 
132  // quality cuts
133  QualityCuts cuts_{// polynomial coefficients for the pT-dependent chi2 cut
134  {0.68177776, 0.74609577, -0.08035491, 0.00315399},
135  // max pT used to determine the chi2 cut
136  10.,
137  // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit
138  30.,
139  // regional cuts for triplets
140  {
141  0.3, // |Tip| < 0.3 cm
142  0.5, // pT > 0.5 GeV
143  12.0 // |Zip| < 12.0 cm
144  },
145  // regional cuts for quadruplets
146  {
147  0.5, // |Tip| < 0.5 cm
148  0.3, // pT > 0.3 GeV
149  12.0 // |Zip| < 12.0 cm
150  }};
151 
152  }; // Params
153 
154 } // namespace cAHitNtupletGenerator
155 
156 template <typename TTraits>
158 public:
159  using Traits = TTraits;
160 
164 
165  template <typename T>
167 
171 
174 
178 
180  : params_(params), paramsMaxDoubletes3Quarters_(3 * params.maxNumberOfDoublets_ / 4) {}
181  ~CAHitNtupletGeneratorKernels() = default;
182 
184 
185  void launchKernels(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
186 
187  void classifyTuples(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
188 
189  void buildDoublets(HitsOnCPU const& hh, cudaStream_t stream);
190  void allocateOnGPU(int32_t nHits, cudaStream_t stream);
191  void cleanup(cudaStream_t cudaStream);
192 
193  static void printCounters(Counters const* counters);
195 
196 private:
197  Counters* counters_ = nullptr;
198 
199  // workspace
205 
209  uint32_t* device_nCells_ = nullptr;
210 
214 
216 
218 
220 
222  // params
223  Params const& params_;
227  inline uint32_t nDoubletBlocks(uint32_t blockSize) {
228  // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed.
229  return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize;
230  }
231 
233  inline uint32_t nQuadrupletBlocks(uint32_t blockSize) {
234  // caConstants::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4
235  return (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize;
236  }
237 };
238 
241 
242 #endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
void launchKernels(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
unique_ptr< HitToTuple > device_hitToTuple_
Params(bool onGPU, uint32_t minHitsPerNtuplet, uint32_t maxNumberOfDoublets, uint16_t minHitsForSharingCuts, bool useRiemannFit, bool fitNas4, bool includeJumpingForwardDoublets, bool earlyFishbone, bool lateFishbone, bool idealConditions, bool doStats, bool doClusterCut, bool doZ0Cut, bool doPtCut, bool doSharedHitCut, bool dupPassThrough, bool useSimpleTripletCleaner, float ptmin, float CAThetaCutBarrel, float CAThetaCutForward, float hardCurvCut, float dcaCutInnerTriplet, float dcaCutOuterTriplet, QualityCuts const &cuts)
uint32_t nQuadrupletBlocks(uint32_t blockSize)
Compute the number of quadruplet blocks for block size.
const uint32_t paramsMaxDoubletes3Quarters_
Intermediate result avoiding repeated computations.
void buildDoublets(HitsOnCPU const &hh, cudaStream_t stream)
unique_ptr< HitToTuple::Counter[]> device_hitToTupleStorage_
constexpr uint32_t maxNumberOfQuadruplets
Definition: CAConstants.h:42
void classifyTuples(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
auto const & hh
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
typename Traits::template unique_ptr< T > unique_ptr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
static void printCounters(Counters const *counters)
TkSoA const *__restrict__ CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts
void allocateOnGPU(int32_t nHits, cudaStream_t stream)
TupleMultiplicity const * tupleMultiplicity() const
uint32_t nDoubletBlocks(uint32_t blockSize)
Compute the number of doublet blocks for block size.
TrackSoA::HitContainer HitContainer
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t maxNumberOfDoublets
unique_ptr< unsigned char[]> cellStorage_
CAHitNtupletGeneratorKernels(Params const &params)
caConstants::CellNeighbors * device_theCellNeighborsContainer_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
void cleanup(cudaStream_t cudaStream)
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
GPUCACell::OuterHitOfCell isOuterHitOfCell_
caConstants::CellTracks * device_theCellTracksContainer_
uint32_t const *__restrict__ TkSoA const *__restrict__ Quality bool dupPassThrough
unique_ptr< cms::cuda::AtomicPairCounter::c_type[]> device_storage_
double ptmin
Definition: HydjetWrapper.h:84
cAHitNtupletGenerator::Counters Counters
cms::cuda::OneToManyAssoc< tindex_type, -1, 4 *maxTuples > HitToTuple
Definition: CAConstants.h:79
cms::cuda::OneToManyAssoc< tindex_type, maxHitsOnTrack+1, maxTuples > TupleMultiplicity
Definition: CAConstants.h:80
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_