CMS 3D CMS Logo

BrokenLineFitOnGPU.h
Go to the documentation of this file.
1 //
2 // Author: Felice Pantaleo, CERN
3 //
4 
5 //#define BROKENLINE_DEBUG
6 //#define BL_DUMP_HITS
7 #include <cstdint>
8 
9 #include <cuda_runtime.h>
10 
16 
17 #include "HelixFitOnGPU.h"
18 
19 template <typename TrackerTraits>
21 template <typename TrackerTraits>
23 template <typename TrackerTraits>
25 
26 // #define BL_DUMP_HITS
27 
28 template <int N, typename TrackerTraits>
29 __global__ void kernel_BLFastFit(Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
32  typename TrackerTraits::tindex_type *__restrict__ ptkids,
33  double *__restrict__ phits,
34  float *__restrict__ phits_ge,
35  double *__restrict__ pfast_fit,
36  uint32_t nHitsL,
37  uint32_t nHitsH,
38  int32_t offset) {
39  constexpr uint32_t hitsInFit = N;
41 
42  assert(hitsInFit <= nHitsL);
43  assert(nHitsL <= nHitsH);
44  assert(phits);
48 
49  // look in bin for this hit multiplicity
52  assert(totTK <= int(tupleMultiplicity->size()));
53  assert(totTK >= 0);
54 
55 #ifdef BROKENLINE_DEBUG
56  if (0 == local_start) {
57  printf("%d total Ntuple\n", tupleMultiplicity->size());
58  printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
59  }
60 #endif
61 
62  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
63  local_idx += gridDim.x * blockDim.x) {
64  int tuple_idx = local_idx + offset;
65  if (tuple_idx >= totTK) {
66  ptkids[local_idx] = invalidTkId;
67  break;
68  }
69  // get it from the ntuple container (one to one to helix)
70  auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
71  assert(int(tkid) < foundNtuplets->nOnes());
72 
73  ptkids[local_idx] = tkid;
74 
75  auto nHits = foundNtuplets->size(tkid);
76 
77  assert(nHits >= nHitsL);
78  assert(nHits <= nHitsH);
79 
80  riemannFit::Map3xNd<N> hits(phits + local_idx);
81  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
82  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
83 
84 #ifdef BL_DUMP_HITS
85  __shared__ int done;
86  done = 0;
87  __syncthreads();
88  bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
89 #endif
90 
91  // Prepare data structure
92  auto const *hitId = foundNtuplets->begin(tkid);
93 
94  // #define YERR_FROM_DC
95 #ifdef YERR_FROM_DC
96  // try to compute more precise error in y
97  auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
98  auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
99  auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
100  float ux, uy, uz;
101 #endif
102 
103  float incr = std::max(1.f, float(nHits) / float(hitsInFit));
104  float n = 0;
105  for (uint32_t i = 0; i < hitsInFit; ++i) {
106  int j = int(n + 0.5f); // round
107  if (hitsInFit - 1 == i)
108  j = nHits - 1; // force last hit to ensure max lever arm.
109  assert(j < int(nHits));
110  n += incr;
111  auto hit = hitId[j];
112  float ge[6];
113 
114 #ifdef YERR_FROM_DC
115  auto const &dp = hh.cpeParams().detParams(hh.detectorIndex(hit));
116  auto status = hh[hit].chargeAndStatus().status;
117  int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
118  assert(qbin >= 0 && qbin < 5);
119  bool nok = (status.isBigY | status.isOneY);
120  // compute cotanbeta and use it to recompute error
121  dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
122  auto cb = std::abs(uy / uz);
123  int bin =
124  int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
125  int low_value = 0;
126  int high_value = CPEFastParametrisation::kNumErrorBins - 1;
127  // return estimated bin value truncated to [0, 15]
128  bin = std::clamp(bin, low_value, high_value);
129  float yerr = dp.sigmay[bin] * 1.e-4f; // toCM
130  yerr *= dp.yfact[qbin]; // inflate
131  yerr *= yerr;
132  yerr += dp.apeYY;
133  yerr = nok ? hh[hit].yerrLocal() : yerr;
134  dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
135 #else
136  hh.cpeParams().detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
137 #endif
138 
139 #ifdef BL_DUMP_HITS
140  bool dump = foundNtuplets->size(tkid) == 5;
141  if (dump) {
142  printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
143  local_idx,
144  tkid,
145  hit,
146  hh[hit].detectorIndex(),
147  i,
148  hh[hit].xGlobal(),
149  hh[hit].yGlobal(),
150  hh[hit].zGlobal());
151  printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
152  }
153 #endif
154 
155  hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
156  hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
157  }
158  brokenline::fastFit(hits, fast_fit);
159 
160  // no NaN here....
161  assert(fast_fit(0) == fast_fit(0));
162  assert(fast_fit(1) == fast_fit(1));
163  assert(fast_fit(2) == fast_fit(2));
164  assert(fast_fit(3) == fast_fit(3));
165  }
166 }
167 
168 template <int N, typename TrackerTraits>
169 __global__ void kernel_BLFit(TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
170  double bField,
172  typename TrackerTraits::tindex_type const *__restrict__ ptkids,
173  double *__restrict__ phits,
174  float *__restrict__ phits_ge,
175  double *__restrict__ pfast_fit) {
176  assert(results_view.pt());
177  assert(results_view.eta());
178  assert(results_view.chi2());
179  assert(pfast_fit);
181 
182  // same as above...
183  // look in bin for this hit multiplicity
184  auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
185  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
186  local_idx += gridDim.x * blockDim.x) {
187  if (invalidTkId == ptkids[local_idx])
188  break;
189  auto tkid = ptkids[local_idx];
190 
191  assert(tkid < TrackerTraits::maxNumberOfTuples);
192 
193  riemannFit::Map3xNd<N> hits(phits + local_idx);
194  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
195  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
196 
198 
201 
203  brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
204  brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
205 
207  results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
208  results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
209  results_view[tkid].eta() = asinhf(line.par(0));
210  results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
211 
212 #ifdef BROKENLINE_DEBUG
213  if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
214  printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
215  printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
216  N,
217  N,
218  tkid,
219  circle.par(0),
220  circle.par(1),
221  circle.par(2));
222  printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
223  printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
224  circle.chi2,
225  line.chi2,
226  circle.cov(0, 0),
227  circle.cov(1, 1),
228  circle.cov(2, 2),
229  line.cov(0, 0),
230  line.cov(1, 1));
231 #endif
232  }
233 }
const dim3 threadIdx
Definition: cudaCompat.h:29
__host__ __device__ void lineFit(const M6xN &hits_ge, const V4 &fast_fit, const double bField, const PreparedBrokenLineData< n > &data, riemannFit::LineFit &line_results)
Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the int...
Definition: BrokenLine.h:463
const dim3 gridDim
Definition: cudaCompat.h:33
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
auto local_start
constexpr int kGenErrorQBins
Vector3d par
parameter: (X0,Y0,R)
Definition: FitResult.h:27
TrackSoAView< TrackerTraits > OutputSoAView
TupleMultiplicity< TrackerTraits > const *__restrict__ tupleMultiplicity
typename TrackSoA< TrackerTraits >::HitContainer Tuples
__host__ __device__ void prepareBrokenLineData(const M3xN &hits, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &results)
Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and ...
Definition: BrokenLine.h:150
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHitsL
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ phits
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:24
double bField
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ ptkids
__host__ __device__ void fastFit(const M3xN &hits, V4 &result)
A very fast helix fit.
Definition: BrokenLine.h:258
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
double OutputSoAView< TrackerTraits > results_view
const dim3 blockIdx
Definition: cudaCompat.h:32
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:29
int nt
Definition: AMPTWrapper.h:42
assert(hitsInFit<=nHitsL)
#define N
Definition: blowfish.cc:9
int totTK
constexpr auto invalidTkId
typename TrackingRecHitSoA< TrackerTraits >::template TrackingRecHitSoALayout<>::ConstView TrackingRecHitSoAConstView
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:31
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t nHitsH
void __syncthreads()
Definition: cudaCompat.h:132
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t int32_t offset
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::View TrackSoAView
static constexpr __host__ __device__ void copyFromCircle(TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i)
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > hh
TupleMultiplicity< TrackerTraits > const *__restrict__ uint32_t nHits
__host__ __device__ void circleFit(const M3xN &hits, const M6xN &hits_ge, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &data, karimaki_circle_fit &circle_results)
Performs the Broken Line fit in the curved track case (that is, the fit parameters are the intercepti...
Definition: BrokenLine.h:314
data needed for the Broken Line fit procedure.
Definition: BrokenLine.h:24
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
TupleMultiplicity< TrackerTraits > const *__restrict__ TrackingRecHitSoAConstView< TrackerTraits > TrackerTraits::tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr int kNumErrorBins
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFitOnGPU.h:13