CMS 3D CMS Logo

RiemannFitOnGPU.h
Go to the documentation of this file.
1 //
2 // Author: Felice Pantaleo, CERN
3 //
4 
5 #include <cstdint>
6 
7 #include <cuda_runtime.h>
8 
14 
15 #include "HelixFitOnGPU.h"
16 
20 
21 template <int N>
22 __global__ void kernel_FastFit(Tuples const *__restrict__ foundNtuplets,
24  uint32_t nHits,
25  HitsOnGPU const *__restrict__ hhp,
26  double *__restrict__ phits,
27  float *__restrict__ phits_ge,
28  double *__restrict__ pfast_fit,
29  uint32_t offset) {
30  constexpr uint32_t hitsInFit = N;
31 
32  assert(hitsInFit <= nHits);
33 
37 
38  // look in bin for this hit multiplicity
40 
41 #ifdef RIEMANN_DEBUG
42  if (0 == local_start)
43  printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
44 #endif
45 
46  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
47  local_idx += gridDim.x * blockDim.x) {
48  auto tuple_idx = local_idx + offset;
49  if (tuple_idx >= tupleMultiplicity->size(nHits))
50  break;
51 
52  // get it from the ntuple container (one to one to helix)
53  auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
54  assert(tkid < foundNtuplets->nOnes());
55 
56  assert(foundNtuplets->size(tkid) == nHits);
57 
58  riemannFit::Map3xNd<N> hits(phits + local_idx);
59  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
60  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
61 
62  // Prepare data structure
63  auto const *hitId = foundNtuplets->begin(tkid);
64  for (unsigned int i = 0; i < hitsInFit; ++i) {
65  auto hit = hitId[i];
66  // printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]);
67  float ge[6];
68  hhp->cpeParams()
69  .detParams(hhp->detectorIndex(hit))
70  .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
71  // printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]);
72 
73  hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
74  hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
75  }
76  riemannFit::fastFit(hits, fast_fit);
77 
78  // no NaN here....
79  assert(fast_fit(0) == fast_fit(0));
80  assert(fast_fit(1) == fast_fit(1));
81  assert(fast_fit(2) == fast_fit(2));
82  assert(fast_fit(3) == fast_fit(3));
83  }
84 }
85 
86 template <int N>
87 __global__ void kernel_CircleFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
88  uint32_t nHits,
89  double bField,
90  double *__restrict__ phits,
91  float *__restrict__ phits_ge,
92  double *__restrict__ pfast_fit_input,
93  riemannFit::CircleFit *circle_fit,
94  uint32_t offset) {
95  assert(circle_fit);
96  assert(N <= nHits);
97 
98  // same as above...
99 
100  // look in bin for this hit multiplicity
101  auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
102  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
103  local_idx += gridDim.x * blockDim.x) {
104  auto tuple_idx = local_idx + offset;
105  if (tuple_idx >= tupleMultiplicity->size(nHits))
106  break;
107 
108  riemannFit::Map3xNd<N> hits(phits + local_idx);
109  riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
110  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
111 
112  riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
113 
115  riemannFit::loadCovariance2D(hits_ge, hits_cov);
116 
117  circle_fit[local_idx] = riemannFit::circleFit(hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
118 
119 #ifdef RIEMANN_DEBUG
120 // auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
121 // printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid,
122 // circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2));
123 #endif
124  }
125 }
126 
127 template <int N>
128 __global__ void kernel_LineFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
129  uint32_t nHits,
130  double bField,
132  double *__restrict__ phits,
133  float *__restrict__ phits_ge,
134  double *__restrict__ pfast_fit_input,
135  riemannFit::CircleFit *__restrict__ circle_fit,
136  uint32_t offset) {
137  assert(results);
138  assert(circle_fit);
139  assert(N <= nHits);
140 
141  // same as above...
142 
143  // look in bin for this hit multiplicity
144  auto local_start = (blockIdx.x * blockDim.x + threadIdx.x);
145  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
146  local_idx += gridDim.x * blockDim.x) {
147  auto tuple_idx = local_idx + offset;
148  if (tuple_idx >= tupleMultiplicity->size(nHits))
149  break;
150 
151  // get it for the ntuple container (one to one to helix)
152  auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
153 
154  riemannFit::Map3xNd<N> hits(phits + local_idx);
155  riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
156  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
157 
158  auto const &line_fit = riemannFit::lineFit(hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
159 
160  riemannFit::fromCircleToPerigee(circle_fit[local_idx]);
161 
162  results->stateAtBS.copyFromCircle(
163  circle_fit[local_idx].par, circle_fit[local_idx].cov, line_fit.par, line_fit.cov, 1.f / float(bField), tkid);
164  results->pt(tkid) = bField / std::abs(circle_fit[local_idx].par(2));
165  results->eta(tkid) = asinhf(line_fit.par(0));
166  results->chi2(tkid) = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
167 
168 #ifdef RIEMANN_DEBUG
169  printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
170  N,
171  nHits,
172  tkid,
173  circle_fit[local_idx].par(0),
174  circle_fit[local_idx].par(1),
175  circle_fit[local_idx].par(2));
176  printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
177  printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
178  circle_fit[local_idx].chi2,
179  line_fit.chi2,
180  circle_fit[local_idx].cov(0, 0),
181  circle_fit[local_idx].cov(1, 1),
182  circle_fit[local_idx].cov(2, 2),
183  line_fit.cov(0, 0),
184  line_fit.cov(1, 1));
185 #endif
186  }
187 }
offset
const caConstants::TupleMultiplicity *__restrict__ uint32_t const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t offset
Definition: RiemannFitOnGPU.h:29
pixelCPEforGPU.h
mps_fire.i
i
Definition: mps_fire.py:428
pfast_fit
const caConstants::TupleMultiplicity *__restrict__ uint32_t const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
Definition: RiemannFitOnGPU.h:23
hfClusterShapes_cfi.hits
hits
Definition: hfClusterShapes_cfi.py:5
nt
int nt
Definition: AMPTWrapper.h:42
TrackingRecHit2DHeterogeneous.h
bookConverter.results
results
Definition: bookConverter.py:144
riemannFit::Matrix2Nd
Eigen::Matrix< double, 2 *N, 2 *N > Matrix2Nd
Definition: FitUtils.h:21
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16
RiemannFit.h
hltPixelTracks_cff.chi2
chi2
Definition: hltPixelTracks_cff.py:25
phits_ge
const caConstants::TupleMultiplicity *__restrict__ uint32_t const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ phits_ge
Definition: RiemannFitOnGPU.h:23
cms::cuda::OneToManyAssoc
Definition: OneToManyAssoc.h:143
__global__
#define __global__
Definition: cudaCompat.h:19
riemannFit::CircleFit
Definition: FitResult.h:26
riemannFit::maxNumberOfConcurrentFits
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFitOnGPU.h:12
nHits
const caConstants::TupleMultiplicity *__restrict__ uint32_t nHits
Definition: RiemannFitOnGPU.h:23
N
#define N
Definition: blowfish.cc:9
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:23
HelixFitOnGPU.h
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
assert
assert(hitsInFit<=nHits)
riemannFit::lineFit
__host__ __device__ LineFit lineFit(const M3xN &hits, const M6xN &hits_ge, const CircleFit &circle, const V4 &fast_fit, const double bField, const bool error)
Perform an ordinary least square fit in the s-z plane to compute the parameters cotTheta and Zip.
Definition: RiemannFit.h:785
riemannFit::Map6xNf
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:28
foundNtuplets
const uint32_t *__restrict__ HitContainer * foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:139
cudaCheck.h
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
riemannFit::fastFit
__host__ __device__ void fastFit(const M3xN &hits, V4 &result)
A very fast helix fit: it fits a circle by three points (first, middle and last point) and a line by ...
Definition: RiemannFit.h:375
riemannFit::VectorNd
Eigen::Matrix< double, N, 1 > VectorNd
Definition: FitUtils.h:33
Calorimetry_cff.bField
bField
Definition: Calorimetry_cff.py:284
tupleMultiplicity
const caConstants::TupleMultiplicity *__restrict__ tupleMultiplicity
Definition: RiemannFitOnGPU.h:23
riemannFit::Map3xNd
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:23
amptDefault_cfi.frame
frame
Definition: amptDefault_cfi.py:12
local_start
auto local_start
Definition: RiemannFitOnGPU.h:39
riemannFit::circleFit
__host__ __device__ CircleFit circleFit(const M2xN &hits2D, const Matrix2Nd< N > &hits_cov2D, const V4 &fast_fit, const VectorNd< N > &rad, const double bField, const bool error)
Fit a generic number of 2D points with a circle using Riemann-Chernov algorithm. Covariance matrix of...
Definition: RiemannFit.h:455
riemannFit::loadCovariance2D
__host__ __device__ void loadCovariance2D(M6xNf const &ge, M2Nd &hits_cov)
Definition: FitUtils.h:88
pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:78
phits
const caConstants::TupleMultiplicity *__restrict__ uint32_t const HitsOnGPU *__restrict__ double *__restrict__ phits
Definition: RiemannFitOnGPU.h:23
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
riemannFit::fromCircleToPerigee
__host__ __device__ void fromCircleToPerigee(CircleFit &circle)
Transform circle parameter from (X0,Y0,R) to (phi,Tip,q/R) and consequently covariance matrix.
Definition: FitUtils.h:196
pixelTrack::TrackSoA
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
Definition: TrackSoAHeterogeneousT.h:76
cuda_assert.h
hhp
const caConstants::TupleMultiplicity *__restrict__ uint32_t const HitsOnGPU *__restrict__ hhp
Definition: RiemannFitOnGPU.h:23
hit
Definition: SiStripHitEffFromCalibTree.cc:88
riemannFit::Map4d
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:30
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32