CMS 3D CMS Logo

CAHitNtupletGeneratorOnGPU.cc
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 // #define GPU_DEBUG
6 // #define DUMP_GPU_TK_TUPLES
7 
8 #include <array>
9 #include <cassert>
10 #include <functional>
11 #include <vector>
12 
27 
29 
30 namespace {
31 
32  using namespace caHitNtupletGenerator;
33  using namespace gpuPixelDoublets;
34  using namespace pixelTopology;
35  using namespace pixelTrack;
36 
37  template <typename T>
38  T sqr(T x) {
39  return x * x;
40  }
41 
42  //Common Params
43  AlgoParams makeCommonParams(edm::ParameterSet const& cfg) {
44  return AlgoParams({cfg.getParameter<bool>("onGPU"),
45  cfg.getParameter<unsigned int>("minHitsForSharingCut"),
46  cfg.getParameter<bool>("useRiemannFit"),
47  cfg.getParameter<bool>("fitNas4"),
48  cfg.getParameter<bool>("includeJumpingForwardDoublets"),
49  cfg.getParameter<bool>("earlyFishbone"),
50  cfg.getParameter<bool>("lateFishbone"),
51  cfg.getParameter<bool>("fillStatistics"),
52  cfg.getParameter<bool>("doSharedHitCut"),
53  cfg.getParameter<bool>("dupPassThrough"),
54  cfg.getParameter<bool>("useSimpleTripletCleaner")});
55  }
56 
57  //This is needed to have the partial specialization for isPhase1Topology/isPhase2Topology
58  template <typename TrackerTraits, typename Enable = void>
59  struct topologyCuts {};
60 
61  template <typename TrackerTraits>
62  struct topologyCuts<TrackerTraits, isPhase1Topology<TrackerTraits>> {
63  static constexpr CAParamsT<TrackerTraits> makeCACuts(edm::ParameterSet const& cfg) {
64  return CAParamsT<TrackerTraits>{{cfg.getParameter<unsigned int>("minHitsPerNtuplet"),
65  (float)cfg.getParameter<double>("ptmin"),
66  (float)cfg.getParameter<double>("CAThetaCutBarrel"),
67  (float)cfg.getParameter<double>("CAThetaCutForward"),
68  (float)cfg.getParameter<double>("hardCurvCut"),
69  (float)cfg.getParameter<double>("dcaCutInnerTriplet"),
70  (float)cfg.getParameter<double>("dcaCutOuterTriplet")}};
71  };
72 
73  static constexpr pixelTrack::QualityCutsT<TrackerTraits> makeQualityCuts(edm::ParameterSet const& pset) {
74  auto coeff = pset.getParameter<std::array<double, 2>>("chi2Coeff");
75  auto ptMax = pset.getParameter<double>("chi2MaxPt");
76 
77  coeff[1] = (coeff[1] - coeff[0]) / log2(ptMax);
78  return pixelTrack::QualityCutsT<TrackerTraits>{// polynomial coefficients for the pT-dependent chi2 cut
79  {(float)coeff[0], (float)coeff[1], 0.f, 0.f},
80  // max pT used to determine the chi2 cut
81  (float)ptMax,
82  // chi2 scale factor: 8 for broken line fit, ?? for Riemann fit
83  (float)pset.getParameter<double>("chi2Scale"),
84  // regional cuts for triplets
85  {(float)pset.getParameter<double>("tripletMaxTip"),
86  (float)pset.getParameter<double>("tripletMinPt"),
87  (float)pset.getParameter<double>("tripletMaxZip")},
88  // regional cuts for quadruplets
89  {(float)pset.getParameter<double>("quadrupletMaxTip"),
90  (float)pset.getParameter<double>("quadrupletMinPt"),
91  (float)pset.getParameter<double>("quadrupletMaxZip")}};
92  }
93  };
94 
95  template <typename TrackerTraits>
96  struct topologyCuts<TrackerTraits, isPhase2Topology<TrackerTraits>> {
97  static constexpr CAParamsT<TrackerTraits> makeCACuts(edm::ParameterSet const& cfg) {
98  return CAParamsT<TrackerTraits>{{cfg.getParameter<unsigned int>("minHitsPerNtuplet"),
99  (float)cfg.getParameter<double>("ptmin"),
100  (float)cfg.getParameter<double>("CAThetaCutBarrel"),
101  (float)cfg.getParameter<double>("CAThetaCutForward"),
102  (float)cfg.getParameter<double>("hardCurvCut"),
103  (float)cfg.getParameter<double>("dcaCutInnerTriplet"),
104  (float)cfg.getParameter<double>("dcaCutOuterTriplet")},
105  {(bool)cfg.getParameter<bool>("includeFarForwards")}};
106  }
107 
108  static constexpr pixelTrack::QualityCutsT<TrackerTraits> makeQualityCuts(edm::ParameterSet const& pset) {
110  (float)pset.getParameter<double>("maxChi2"),
111  (float)pset.getParameter<double>("minPt"),
112  (float)pset.getParameter<double>("maxTip"),
113  (float)pset.getParameter<double>("maxZip"),
114  };
115  }
116  };
117 
118  //Cell Cuts, as they are the cuts have the same logic for Phase2 and Phase1
119  //keeping them separate would allow further differentiation in the future
120  //moving them to topologyCuts and using the same syntax
121  template <typename TrakterTraits>
122  CellCutsT<TrakterTraits> makeCellCuts(edm::ParameterSet const& cfg) {
123  return CellCutsT<TrakterTraits>{cfg.getParameter<unsigned int>("maxNumberOfDoublets"),
124  cfg.getParameter<bool>("doClusterCut"),
125  cfg.getParameter<bool>("doZ0Cut"),
126  cfg.getParameter<bool>("doPtCut"),
127  cfg.getParameter<bool>("idealConditions")};
128  }
129 
130 } // namespace
131 
132 using namespace std;
133 
134 template <typename TrackerTraits>
137  : m_params(makeCommonParams(cfg),
138  makeCellCuts<TrackerTraits>(cfg),
139  topologyCuts<TrackerTraits>::makeQualityCuts(cfg.getParameterSet("trackQualityCuts")),
140  topologyCuts<TrackerTraits>::makeCACuts(cfg)) {
141 #ifdef DUMP_GPU_TK_TUPLES
142  printf("TK: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
143  "tid",
144  "qual",
145  "nh",
146  "nl",
147  "charge",
148  "pt",
149  "eta",
150  "phi",
151  "tip",
152  "zip",
153  "chi2",
154  "h1",
155  "h2",
156  "h3",
157  "h4",
158  "h5",
159  "hn");
160 #endif
161 }
162 
163 template <typename TrackerTraits>
165  fillDescriptionsCommon(desc);
166  edm::LogWarning("CAHitNtupletGeneratorOnGPU::fillDescriptions")
167  << "Note: this fillDescriptions is a dummy one. Most probably you are missing some parameters. \n"
168  "please implement your TrackerTraits descriptions in CAHitNtupletGeneratorOnGPU. \n";
169 }
170 
171 template <>
173  fillDescriptionsCommon(desc);
174 
175  desc.add<bool>("idealConditions", true);
176  desc.add<bool>("includeJumpingForwardDoublets", false);
177 
179  trackQualityCuts.add<double>("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut");
180  trackQualityCuts.add<std::vector<double>>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above");
181  trackQualityCuts.add<double>("chi2Scale", 8.)
182  ->setComment(
183  "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann "
184  "fit)");
185  trackQualityCuts.add<double>("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV");
186  trackQualityCuts.add<double>("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm");
187  trackQualityCuts.add<double>("tripletMaxZip", 12.)->setComment("Max |Zip| for triplets, in cm");
188  trackQualityCuts.add<double>("quadrupletMinPt", 0.3)->setComment("Min pT for quadruplets, in GeV");
189  trackQualityCuts.add<double>("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm");
190  trackQualityCuts.add<double>("quadrupletMaxZip", 12.)->setComment("Max |Zip| for quadruplets, in cm");
191  desc.add<edm::ParameterSetDescription>("trackQualityCuts", trackQualityCuts)
192  ->setComment(
193  "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply \"region "
194  "cuts\" based on the fit results (pT, Tip, Zip).");
195 }
196 
197 template <>
199  fillDescriptionsCommon(desc);
200 
201  desc.add<bool>("idealConditions", false);
202  desc.add<bool>("includeFarForwards", true);
203  desc.add<bool>("includeJumpingForwardDoublets", true);
204 
206  trackQualityCuts.add<double>("maxChi2", 5.)->setComment("Max normalized chi2");
207  trackQualityCuts.add<double>("minPt", 0.5)->setComment("Min pT in GeV");
208  trackQualityCuts.add<double>("maxTip", 0.3)->setComment("Max |Tip| in cm");
209  trackQualityCuts.add<double>("maxZip", 12.)->setComment("Max |Zip|, in cm");
210  desc.add<edm::ParameterSetDescription>("trackQualityCuts", trackQualityCuts)
211  ->setComment(
212  "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, "
213  "Zip).");
214 }
215 
216 template <typename TrackerTraits>
218  // 87 cm/GeV = 1/(3.8T * 0.3)
219  // take less than radius given by the hardPtCut and reject everything below
220  // auto hardCurvCut = 1.f/(0.35 * 87.f);
221  desc.add<double>("ptmin", 0.9f)->setComment("Cut on minimum pt");
222  desc.add<double>("CAThetaCutBarrel", 0.002f)->setComment("Cut on RZ alignement for Barrel");
223  desc.add<double>("CAThetaCutForward", 0.003f)->setComment("Cut on RZ alignment for Forward");
224  desc.add<double>("hardCurvCut", 1.f / (0.35 * 87.f))->setComment("Cut on minimum curvature");
225  desc.add<double>("dcaCutInnerTriplet", 0.15f)->setComment("Cut on origin radius when the inner hit is on BPix1");
226  desc.add<double>("dcaCutOuterTriplet", 0.25f)->setComment("Cut on origin radius when the outer hit is on BPix1");
227  desc.add<bool>("earlyFishbone", true);
228  desc.add<bool>("lateFishbone", false);
229  desc.add<bool>("fillStatistics", false);
230  desc.add<unsigned int>("minHitsPerNtuplet", 4);
231  desc.add<unsigned int>("maxNumberOfDoublets", TrackerTraits::maxNumberOfDoublets);
232  desc.add<unsigned int>("minHitsForSharingCut", 10)
233  ->setComment("Maximum number of hits in a tuple to clean also if the shared hit is on bpx1");
234 
235  desc.add<bool>("fitNas4", false)->setComment("fit only 4 hits out of N");
236  desc.add<bool>("doClusterCut", true);
237  desc.add<bool>("doZ0Cut", true);
238  desc.add<bool>("doPtCut", true);
239  desc.add<bool>("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine");
240  desc.add<bool>("doSharedHitCut", true)->setComment("Sharing hit nTuples cleaning");
241  desc.add<bool>("dupPassThrough", false)->setComment("Do not reject duplicate");
242  desc.add<bool>("useSimpleTripletCleaner", true)->setComment("use alternate implementation");
243 }
244 
245 template <typename TrackerTraits>
247  if (m_params.onGPU_) {
248  // allocate pinned host memory only if CUDA is available
250  if (cuda and cuda->enabled()) {
251  cudaCheck(cudaMalloc(&m_counters, sizeof(Counters)));
252  cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters)));
253  }
254  } else {
255  m_counters = new Counters();
256  memset(m_counters, 0, sizeof(Counters));
257  }
258 }
259 
260 template <typename TrackerTraits>
262  if (m_params.onGPU_) {
263  // print the gpu statistics and free pinned host memory only if CUDA is available
265  if (cuda and cuda->enabled()) {
266  if (m_params.doStats_) {
267  // crash on multi-gpu processes
269  }
270  cudaFree(m_counters);
271  }
272  } else {
273  if (m_params.doStats_) {
275  }
276  delete m_counters;
277  }
278 }
279 
280 template <typename TrackerTraits>
282  HitsOnDevice const& hits_d, float bfield, cudaStream_t stream) const {
286 
288 
289  GPUKernels kernels(m_params);
290  kernels.setCounters(m_counters);
291  kernels.allocateOnGPU(hits_d.nHits(), stream);
292 
293  kernels.buildDoublets(hits_d.view(), hits_d.offsetBPIX2(), stream);
294 
295  kernels.launchKernels(hits_d.view(), tracks.view(), stream);
296 
297  HelixFitOnGPU fitter(bfield, m_params.fitNas4_);
298  fitter.allocateOnGPU(kernels.tupleMultiplicity(), tracks.view());
299  if (m_params.useRiemannFit_) {
300  fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), TrackerTraits::maxNumberOfQuadruplets, stream);
301  } else {
302  fitter.launchBrokenLineKernels(hits_d.view(), hits_d.nHits(), TrackerTraits::maxNumberOfQuadruplets, stream);
303  }
304  kernels.classifyTuples(hits_d.view(), tracks.view(), stream);
305 #ifdef GPU_DEBUG
306  cudaDeviceSynchronize();
307  cudaCheck(cudaGetLastError());
308  std::cout << "finished building pixel tracks on GPU" << std::endl;
309 #endif
310 
311  return tracks;
312 }
313 
314 template <typename TrackerTraits>
316  float bfield) const {
320 
322 
323  CPUKernels kernels(m_params);
324  kernels.setCounters(m_counters);
325  kernels.allocateOnGPU(hits_h.nHits(), nullptr);
326 
327  kernels.buildDoublets(hits_h.view(), hits_h.offsetBPIX2(), nullptr);
328  kernels.launchKernels(hits_h.view(), tracks.view(), nullptr);
329 
330  if (0 == hits_h.nHits())
331  return tracks;
332 
333  // now fit
334  HelixFitOnGPU fitter(bfield, m_params.fitNas4_);
335  fitter.allocateOnGPU(kernels.tupleMultiplicity(), tracks.view());
336 
337  if (m_params.useRiemannFit_) {
338  fitter.launchRiemannKernelsOnCPU(hits_h.view(), hits_h.nHits(), TrackerTraits::maxNumberOfQuadruplets);
339  } else {
340  fitter.launchBrokenLineKernelsOnCPU(hits_h.view(), hits_h.nHits(), TrackerTraits::maxNumberOfQuadruplets);
341  }
342 
343  kernels.classifyTuples(hits_h.view(), tracks.view(), nullptr);
344 
345 #ifdef GPU_DEBUG
346  std::cout << "finished building pixel tracks on CPU" << std::endl;
347 #endif
348 
349  // check that the fixed-size SoA does not overflow
350  auto maxTracks = tracks.view().metadata().size();
351  auto nTracks = tracks.view().nTracks();
353  if (nTracks == maxTracks - 1) {
354  edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1
355  << " candidates";
356  }
357 
358  return tracks;
359 }
360 
static void fillDescriptionsCommon(edm::ParameterSetDescription &desc)
static void printCounters(Counters const *counters)
TrackSoAHost makeTuples(HitsOnHost const &hits_d, float bfield) const
CAHitNtupletGeneratorOnGPU(const edm::ParameterSet &cfg, edm::ConsumesCollector &&iC)
typename std::enable_if< std::is_base_of< Phase2, T >::value >::type isPhase2Topology
caHitNtupletGenerator::Counters Counters
static void fillDescriptions(edm::ParameterSetDescription &desc)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
assert(be >=bs)
uint32_t offsetBPIX2() const
void launchRiemannKernelsOnCPU(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples)
TrackSoADevice makeTuplesAsync(HitsOnDevice const &hits_d, float bfield, cudaStream_t stream) const
void launchRiemannKernels(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples, cudaStream_t cudaStream)
typename std::enable_if< std::is_base_of< Phase1, T >::value >::type isPhase1Topology
void launchBrokenLineKernelsOnCPU(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples)
void allocateOnGPU(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results)
Definition: HelixFitOnGPU.cc:5
ParameterSet const & getParameterSet(ParameterSetID const &id)
maxTracks
Definition: DMR_cfg.py:158
Square< F >::type sqr(const F &f)
Definition: Square.h:14
float x
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
Log< level::Warning, false > LogWarning
long double T
void launchBrokenLineKernels(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples, cudaStream_t cudaStream)
static void printCounters(Counters const *counters)