CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
CAHitNtupletGeneratorKernels.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
3 
4 // #define GPU_DEBUG
5 
7 #include "GPUCACell.h"
8 
9 // #define DUMP_GPU_TK_TUPLES
10 
11 namespace cAHitNtupletGenerator {
12 
13  // counters
14  struct Counters {
15  unsigned long long nEvents;
16  unsigned long long nHits;
17  unsigned long long nCells;
18  unsigned long long nTuples;
19  unsigned long long nFitTracks;
20  unsigned long long nLooseTracks;
21  unsigned long long nGoodTracks;
22  unsigned long long nUsedHits;
23  unsigned long long nDupHits;
24  unsigned long long nKilledCells;
25  unsigned long long nEmptyCells;
26  unsigned long long nZeroTrackCells;
27  };
28 
31 
34 
38 
39  struct QualityCuts {
40  // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
41  float chi2Coeff[4];
42  float chi2MaxPt; // GeV
43  float chi2Scale;
44 
45  struct Region {
46  float maxTip; // cm
47  float minPt; // GeV
48  float maxZip; // cm
49  };
50 
53  };
54 
55  // params (FIXME: thi si a POD: so no constructor no traling _ and no const as params_ is already const)
56  struct Params {
57  Params(bool onGPU,
58  uint32_t minHitsPerNtuplet,
59  uint32_t maxNumberOfDoublets,
60  uint16_t minHitsForSharingCuts,
61  bool useRiemannFit,
62  bool fitNas4,
63  bool includeJumpingForwardDoublets,
64  bool earlyFishbone,
65  bool lateFishbone,
66  bool idealConditions,
67  bool doStats,
68  bool doClusterCut,
69  bool doZ0Cut,
70  bool doPtCut,
71  bool doSharedHitCut,
72  bool dupPassThrough,
73  bool useSimpleTripletCleaner,
74  float ptmin,
75  float CAThetaCutBarrel,
76  float CAThetaCutForward,
77  float hardCurvCut,
78  float dcaCutInnerTriplet,
79  float dcaCutOuterTriplet,
80 
81  QualityCuts const& cuts)
82  : onGPU_(onGPU),
83  minHitsPerNtuplet_(minHitsPerNtuplet),
84  maxNumberOfDoublets_(maxNumberOfDoublets),
85  minHitsForSharingCut_(minHitsForSharingCuts),
86  useRiemannFit_(useRiemannFit),
87  fitNas4_(fitNas4),
88  includeJumpingForwardDoublets_(includeJumpingForwardDoublets),
89  earlyFishbone_(earlyFishbone),
90  lateFishbone_(lateFishbone),
91  idealConditions_(idealConditions),
92  doStats_(doStats),
93  doClusterCut_(doClusterCut),
94  doZ0Cut_(doZ0Cut),
95  doPtCut_(doPtCut),
96  doSharedHitCut_(doSharedHitCut),
97  dupPassThrough_(dupPassThrough),
98  useSimpleTripletCleaner_(useSimpleTripletCleaner),
99  ptmin_(ptmin),
100  CAThetaCutBarrel_(CAThetaCutBarrel),
101  CAThetaCutForward_(CAThetaCutForward),
102  hardCurvCut_(hardCurvCut),
103  dcaCutInnerTriplet_(dcaCutInnerTriplet),
104  dcaCutOuterTriplet_(dcaCutOuterTriplet),
105  cuts_(cuts) {}
106 
107  const bool onGPU_;
108  const uint32_t minHitsPerNtuplet_;
109  const uint32_t maxNumberOfDoublets_;
110  const uint16_t minHitsForSharingCut_;
111  const bool useRiemannFit_;
112  const bool fitNas4_;
114  const bool earlyFishbone_;
115  const bool lateFishbone_;
116  const bool idealConditions_;
117  const bool doStats_;
118  const bool doClusterCut_;
119  const bool doZ0Cut_;
120  const bool doPtCut_;
121  const bool doSharedHitCut_;
122  const bool dupPassThrough_;
124  const float ptmin_;
125  const float CAThetaCutBarrel_;
126  const float CAThetaCutForward_;
127  const float hardCurvCut_;
128  const float dcaCutInnerTriplet_;
129  const float dcaCutOuterTriplet_;
130 
131  // quality cuts
132  QualityCuts cuts_{// polynomial coefficients for the pT-dependent chi2 cut
133  {0.68177776, 0.74609577, -0.08035491, 0.00315399},
134  // max pT used to determine the chi2 cut
135  10.,
136  // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit
137  30.,
138  // regional cuts for triplets
139  {
140  0.3, // |Tip| < 0.3 cm
141  0.5, // pT > 0.5 GeV
142  12.0 // |Zip| < 12.0 cm
143  },
144  // regional cuts for quadruplets
145  {
146  0.5, // |Tip| < 0.5 cm
147  0.3, // pT > 0.3 GeV
148  12.0 // |Zip| < 12.0 cm
149  }};
150 
151  }; // Params
152 
153 } // namespace cAHitNtupletGenerator
154 
155 template <typename TTraits>
157 public:
158  using Traits = TTraits;
159 
163 
164  template <typename T>
166 
170 
173 
177 
179  : params_(params), paramsMaxDoubletes3Quarters_(3 * params.maxNumberOfDoublets_ / 4) {}
180  ~CAHitNtupletGeneratorKernels() = default;
181 
183 
184  void launchKernels(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
185 
186  void classifyTuples(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
187 
188  void buildDoublets(HitsOnCPU const& hh, cudaStream_t stream);
189  void allocateOnGPU(int32_t nHits, cudaStream_t stream);
190  void cleanup(cudaStream_t cudaStream);
191 
192  static void printCounters(Counters const* counters);
194 
195 private:
196  Counters* counters_ = nullptr;
197 
198  // workspace
204 
208  uint32_t* device_nCells_ = nullptr;
209 
213 
215 
217 
219 
221  // params
222  Params const& params_;
226  inline uint32_t nDoubletBlocks(uint32_t blockSize) {
227  // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed.
228  return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize;
229  }
230 
232  inline uint32_t nQuadrupletBlocks(uint32_t blockSize) {
233  // caConstants::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4
234  return (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize;
235  }
236 };
237 
240 
241 #endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
void launchKernels(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool doZ0Cut
unique_ptr< HitToTuple > device_hitToTuple_
Params(bool onGPU, uint32_t minHitsPerNtuplet, uint32_t maxNumberOfDoublets, uint16_t minHitsForSharingCuts, bool useRiemannFit, bool fitNas4, bool includeJumpingForwardDoublets, bool earlyFishbone, bool lateFishbone, bool idealConditions, bool doStats, bool doClusterCut, bool doZ0Cut, bool doPtCut, bool doSharedHitCut, bool dupPassThrough, bool useSimpleTripletCleaner, float ptmin, float CAThetaCutBarrel, float CAThetaCutForward, float hardCurvCut, float dcaCutInnerTriplet, float dcaCutOuterTriplet, QualityCuts const &cuts)
uint32_t nQuadrupletBlocks(uint32_t blockSize)
Compute the number of quadruplet blocks for block size.
const uint32_t paramsMaxDoubletes3Quarters_
Intermediate result avoiding repeated computations.
void buildDoublets(HitsOnCPU const &hh, cudaStream_t stream)
unique_ptr< HitToTuple::Counter[]> device_hitToTupleStorage_
constexpr uint32_t maxNumberOfQuadruplets
Definition: CAConstants.h:42
void classifyTuples(HitsOnCPU const &hh, TkSoA *tuples_d, cudaStream_t cudaStream)
typename Traits::template unique_ptr< T > unique_ptr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
static void printCounters(Counters const *counters)
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool doClusterCut
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
TkSoA const *__restrict__ CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts
TupleMultiplicity const * tupleMultiplicity() const
void allocateOnGPU(int32_t nHits, cudaStream_t stream)
uint32_t nDoubletBlocks(uint32_t blockSize)
Compute the number of doublet blocks for block size.
TrackSoA::HitContainer HitContainer
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t maxNumberOfDoublets
unique_ptr< unsigned char[]> cellStorage_
CAHitNtupletGeneratorKernels(Params const &params)
caConstants::CellNeighbors * device_theCellNeighborsContainer_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
unique_ptr< GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_
void cleanup(cudaStream_t cudaStream)
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
GPUCACell::OuterHitOfCell isOuterHitOfCell_
caConstants::CellTracks * device_theCellTracksContainer_
uint32_t const *__restrict__ TkSoA const *__restrict__ Quality bool dupPassThrough
unique_ptr< cms::cuda::AtomicPairCounter::c_type[]> device_storage_
double ptmin
Definition: HydjetWrapper.h:84
cAHitNtupletGenerator::Counters Counters
cms::cuda::OneToManyAssoc< tindex_type,-1, 4 *maxTuples > HitToTuple
Definition: CAConstants.h:79
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool bool doPtCut
cms::cuda::OneToManyAssoc< tindex_type, maxHitsOnTrack+1, maxTuples > TupleMultiplicity
Definition: CAConstants.h:80
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
auto const & hh
def template
Definition: svgfig.py:521
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_