CMS 3D CMS Logo

BrokenLineFit.dev.cc
Go to the documentation of this file.
1 //#define BROKENLINE_DEBUG
2 //#define BL_DUMP_HITS
3 
4 #include <cstdint>
5 
6 #include <alpaka/alpaka.hpp>
7 
12 
13 #include "HelixFit.h"
14 
15 template <typename TrackerTraits>
17 template <typename TrackerTraits>
19 template <typename TrackerTraits>
21 
22 // #define BL_DUMP_HITS
23 
25  template <int N, typename TrackerTraits>
27  public:
28  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
29  ALPAKA_FN_ACC void operator()(TAcc const &acc,
30  Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
33  pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
34  typename TrackerTraits::tindex_type *__restrict__ ptkids,
35  double *__restrict__ phits,
36  float *__restrict__ phits_ge,
37  double *__restrict__ pfast_fit,
38  uint32_t nHitsL,
39  uint32_t nHitsH,
40  int32_t offset) const {
41  constexpr uint32_t hitsInFit = N;
43 
44  ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
50 
51  // look in bin for this hit multiplicity
53  ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
55 
56 #ifdef BROKENLINE_DEBUG
57  const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
59  printf("%d total Ntuple\n", tupleMultiplicity->size());
60  printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
61  }
62 #endif
64  for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
65  auto tuple_idx = local_idx + offset;
66  if ((int)tuple_idx >= totTK) {
67  ptkids[local_idx] = invalidTkId;
68  break;
69  }
70  // get it from the ntuple container (one to one to helix)
71  auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
72  ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
73 
74  ptkids[local_idx] = tkid;
75 
76  auto nHits = foundNtuplets->size(tkid);
77 
80 
81  riemannFit::Map3xNd<N> hits(phits + local_idx);
83  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
84 
85 #ifdef BL_DUMP_HITS
86  auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
87  done = 0;
88  alpaka::syncBlockThreads(acc);
89  bool dump =
90  (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
91 #endif
92 
93  // Prepare data structure
94  auto const *hitId = foundNtuplets->begin(tkid);
95 
96  // #define YERR_FROM_DC
97 #ifdef YERR_FROM_DC
98  // try to compute more precise error in y
99  auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
100  auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
101  auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
102  float ux, uy, uz;
103 #endif
104 
105  float incr = std::max(1.f, float(nHits) / float(hitsInFit));
106  float n = 0;
107  for (uint32_t i = 0; i < hitsInFit; ++i) {
108  int j = int(n + 0.5f); // round
109  if (hitsInFit - 1 == i)
110  j = nHits - 1; // force last hit to ensure max lever arm.
111  ALPAKA_ASSERT_ACC(j < int(nHits));
112  n += incr;
113  auto hit = hitId[j];
114  float ge[6];
115 
116 #ifdef YERR_FROM_DC
117  auto const &dp = cpeParams->detParams(hh.detectorIndex(hit));
118  auto status = hh[hit].chargeAndStatus().status;
119  int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
120  ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
121  bool nok = (status.isBigY | status.isOneY);
122  // compute cotanbeta and use it to recompute error
123  dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
124  auto cb = std::abs(uy / uz);
125  int bin =
126  int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
127  int low_value = 0;
128  int high_value = CPEFastParametrisation::kNumErrorBins - 1;
129  // return estimated bin value truncated to [0, 15]
130  bin = std::clamp(bin, low_value, high_value);
131  float yerr = dp.sigmay[bin] * 1.e-4f; // toCM
132  yerr *= dp.yfact[qbin]; // inflate
133  yerr *= yerr;
134  yerr += dp.apeYY;
135  yerr = nok ? hh[hit].yerrLocal() : yerr;
136  dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
137 #else
138  cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
139 #endif
140 
141 #ifdef BL_DUMP_HITS
142  bool dump = foundNtuplets->size(tkid) == 5;
143  if (dump) {
144  printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
145  local_idx,
146  tkid,
147  hit,
148  hh[hit].detectorIndex(),
149  i,
150  hh[hit].xGlobal(),
151  hh[hit].yGlobal(),
152  hh[hit].zGlobal());
153  printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
154  }
155 #endif
156 
157  hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
158  hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
159  }
161 
162 #ifdef BROKENLINE_DEBUG
163  // any NaN value should cause the track to be rejected at a later stage
168 #endif
169  }
170  }
171  };
172 
173  template <int N, typename TrackerTraits>
174  struct Kernel_BLFit {
175  public:
176  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
177  ALPAKA_FN_ACC void operator()(TAcc const &acc,
179  double bField,
181  typename TrackerTraits::tindex_type const *__restrict__ ptkids,
182  double *__restrict__ phits,
183  float *__restrict__ phits_ge,
184  double *__restrict__ pfast_fit) const {
190 
191  // same as above...
192  // look in bin for this hit multiplicity
194  for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
195  if (invalidTkId == ptkids[local_idx])
196  break;
197  auto tkid = ptkids[local_idx];
198 
199  ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples);
200 
201  riemannFit::Map3xNd<N> hits(phits + local_idx);
202  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
203  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
204 
206 
209 
211  brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
212  brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
213 
215  results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
216  results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
217  results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
218  results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
219 
220 #ifdef BROKENLINE_DEBUG
221  if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
222  printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
223  printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
224  N,
225  N,
226  tkid,
227  circle.par(0),
228  circle.par(1),
229  circle.par(2));
230  printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
231  printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
232  circle.chi2,
233  line.chi2,
234  circle.cov(0, 0),
235  circle.cov(1, 1),
236  circle.cov(2, 2),
237  line.cov(0, 0),
238  line.cov(1, 1));
239 #endif
240  }
241  }
242  };
243 
244  template <typename TrackerTraits>
248  uint32_t hitsInFit,
249  uint32_t maxNumberOfTuples,
250  Queue &queue) {
251  ALPAKA_ASSERT_ACC(tuples_);
252 
253  uint32_t blockSize = 64;
254  uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
255  const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
256  const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
257 
258  // Fit internals
259  auto tkidDevice =
260  cms::alpakatools::make_device_buffer<typename TrackerTraits::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
261  auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
262  queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
263  auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
264  queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
265  auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
266  queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
267 
268  for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
269  // fit triplets
270 
271  alpaka::exec<Acc1D>(queue,
272  workDivTriplets,
274  tuples_,
275  tupleMultiplicity_,
276  hv,
277  cpeParams,
278  tkidDevice.data(),
279  hitsDevice.data(),
280  hits_geDevice.data(),
281  fast_fit_resultsDevice.data(),
282  3,
283  3,
284  offset);
285 
286  alpaka::exec<Acc1D>(queue,
287  workDivTriplets,
289  tupleMultiplicity_,
290  bField_,
291  outputSoa_,
292  tkidDevice.data(),
293  hitsDevice.data(),
294  hits_geDevice.data(),
295  fast_fit_resultsDevice.data());
296 
297  if (fitNas4_) {
298  // fit all as 4
299  riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
300  &hv,
301  &cpeParams,
302  &tkidDevice,
303  &hitsDevice,
304  &hits_geDevice,
305  &fast_fit_resultsDevice,
306  &offset,
307  &queue,
308  &workDivQuadsPenta](auto i) {
309  alpaka::exec<Acc1D>(queue,
310  workDivQuadsPenta,
312  tuples_,
313  tupleMultiplicity_,
314  hv,
315  cpeParams,
316  tkidDevice.data(),
317  hitsDevice.data(),
318  hits_geDevice.data(),
319  fast_fit_resultsDevice.data(),
320  4,
321  4,
322  offset);
323 
324  alpaka::exec<Acc1D>(queue,
325  workDivQuadsPenta,
327  tupleMultiplicity_,
328  bField_,
329  outputSoa_,
330  tkidDevice.data(),
331  hitsDevice.data(),
332  hits_geDevice.data(),
333  fast_fit_resultsDevice.data());
334  });
335 
336  } else {
337  riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
338  &hv,
339  &cpeParams,
340  &tkidDevice,
341  &hitsDevice,
342  &hits_geDevice,
343  &fast_fit_resultsDevice,
344  &offset,
345  &queue,
346  &workDivQuadsPenta](auto i) {
347  alpaka::exec<Acc1D>(queue,
348  workDivQuadsPenta,
350  tuples_,
351  tupleMultiplicity_,
352  hv,
353  cpeParams,
354  tkidDevice.data(),
355  hitsDevice.data(),
356  hits_geDevice.data(),
357  fast_fit_resultsDevice.data(),
358  i,
359  i,
360  offset);
361 
362  alpaka::exec<Acc1D>(queue,
363  workDivQuadsPenta,
365  tupleMultiplicity_,
366  bField_,
367  outputSoa_,
368  tkidDevice.data(),
369  hitsDevice.data(),
370  hits_geDevice.data(),
371  fast_fit_resultsDevice.data());
372  });
373 
374  static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
375 
376  //Fit all the rest using the maximum from previous call
377  alpaka::exec<Acc1D>(queue,
378  workDivQuadsPenta,
380  tuples_,
381  tupleMultiplicity_,
382  hv,
383  cpeParams,
384  tkidDevice.data(),
385  hitsDevice.data(),
386  hits_geDevice.data(),
387  fast_fit_resultsDevice.data(),
388  TrackerTraits::maxHitsOnTrackForFullFit,
389  TrackerTraits::maxHitsOnTrack - 1,
390  offset);
391 
392  alpaka::exec<Acc1D>(queue,
393  workDivQuadsPenta,
395  tupleMultiplicity_,
396  bField_,
397  outputSoa_,
398  tkidDevice.data(),
399  hitsDevice.data(),
400  hits_geDevice.data(),
401  fast_fit_resultsDevice.data());
402  }
403 
404  } // loop on concurrent fits
405  }
406 
407  template class HelixFit<pixelTopology::Phase1>;
408  template class HelixFit<pixelTopology::Phase2>;
410 
411 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
const dim3 threadIdx
Definition: cudaCompat.h:29
Eigen::Vector4d Vector4d
Definition: FitResult.h:12
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fastFit(const TAcc &acc, const M3xN &hits, V4 &result)
A very fast helix fit.
Definition: BrokenLine.h:266
def isnan(num)
constexpr DetParams const &__restrict__ detParams(int i) const
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
void launchBrokenLineKernels(const HitConstView &hv, ParamsOnDevice const *cpeParams, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue)
constexpr int kGenErrorQBins
Vector3d par
parameter: (X0,Y0,R)
Definition: FitResult.h:24
typename reco::TrackSoA< TrackerTraits >::HitContainer Tuples
TupleMultiplicity< TrackerTraits > const *__restrict__ tupleMultiplicity
ALPAKA_FN_ACC void operator()(TAcc const &acc, TupleMultiplicity< TrackerTraits > const *__restrict__ tupleMultiplicity, double bField, OutputSoAView< TrackerTraits > results_view, typename TrackerTraits::tindex_type const *__restrict__ ptkids, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit) const
ALPAKA_FN_ACC ALPAKA_FN_INLINE void lineFit(const TAcc &acc, const M6xN &hits_ge, const V4 &fast_fit, const double bField, const PreparedBrokenLineData< n > &data, riemannFit::LineFit &line_results)
Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the int...
Definition: BrokenLine.h:472
__host__ __device__ void prepareBrokenLineData(const M3xN &hits, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &results)
Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and ...
Definition: BrokenLine.h:151
Eigen::Matrix< float, 6, N > Matrix6xNf
Definition: HelixFit.h:35
WorkDiv< Dim1D > WorkDiv1D
Definition: config.h:32
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHitsL
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ phits
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFit.h:39
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFit.h:32
ALPAKA_FN_ACC void operator()(TAcc const &acc, Tuples< TrackerTraits > const *__restrict__ foundNtuplets, TupleMultiplicity< TrackerTraits > const *__restrict__ tupleMultiplicity, TrackingRecHitSoAConstView< TrackerTraits > hh, pixelCPEforDevice::ParamsOnDeviceT< TrackerTraits > const *__restrict__ cpeParams, typename TrackerTraits::tindex_type *__restrict__ ptkids, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit, uint32_t nHitsL, uint32_t nHitsH, int32_t offset) const
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ ptkids
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
Eigen::Matrix< double, 3, N > Matrix3xNd
Definition: HelixFit.h:30
double OutputSoAView< TrackerTraits > results_view
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFit.h:37
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
int nt
Definition: AMPTWrapper.h:42
ALPAKA_FN_ACC ALPAKA_FN_INLINE void const M3xN const V4 & fast_fit
Definition: BrokenLine.h:158
#define N
Definition: blowfish.cc:9
int totTK
constexpr auto invalidTkId
typename TrackingRecHitSoA< TrackerTraits >::template TrackingRecHitSoALayout<>::ConstView TrackingRecHitSoAConstView
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t nHitsH
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
data needed for the Broken Line fit procedure.
Definition: BrokenLine.h:30
static constexpr __host__ __device__ void copyFromCircle(TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i)
TupleMultiplicity< TrackerTraits > const *__restrict__ uint32_t nHits
ALPAKA_FN_ACC ALPAKA_FN_INLINE void circleFit(const TAcc &acc, const M3xN &hits, const M6xN &hits_ge, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &data, karimaki_circle_fit &circle_results)
Performs the Broken Line fit in the curved track case (that is, the fit parameters are the intercepti...
Definition: BrokenLine.h:323
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr int kNumErrorBins
reco::TrackSoAView< TrackerTraits > OutputSoAView
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFit.h:21
typename reco::TrackSoA< TrackerTraits >::template Layout<>::View TrackSoAView
Definition: TracksSoA.h:45