CMS 3D CMS Logo

BrokenLineFitOnGPU.h
Go to the documentation of this file.
1 //
2 // Author: Felice Pantaleo, CERN
3 //
4 
5 // #define BROKENLINE_DEBUG
6 
7 #include <cstdint>
8 
9 #include <cuda_runtime.h>
10 
16 
17 #include "HelixFitOnGPU.h"
18 
22 
23 // #define BL_DUMP_HITS
24 
25 template <int N>
26 __global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets,
28  HitsOnGPU const *__restrict__ hhp,
29  double *__restrict__ phits,
30  float *__restrict__ phits_ge,
31  double *__restrict__ pfast_fit,
32  uint32_t nHits,
33  uint32_t offset) {
34  constexpr uint32_t hitsInFit = N;
35 
36  assert(hitsInFit <= nHits);
37 
38  assert(hhp);
42 
43  // look in bin for this hit multiplicity
45 
46 #ifdef BROKENLINE_DEBUG
47  if (0 == local_start) {
48  printf("%d total Ntuple\n", foundNtuplets->nbins());
49  printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
50  }
51 #endif
52 
53  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
54  local_idx += gridDim.x * blockDim.x) {
55  auto tuple_idx = local_idx + offset;
56  if (tuple_idx >= tupleMultiplicity->size(nHits))
57  break;
58 
59  // get it from the ntuple container (one to one to helix)
60  auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
61  assert(tkid < foundNtuplets->nbins());
62 
63  assert(foundNtuplets->size(tkid) == nHits);
64 
65  riemannFit::Map3xNd<N> hits(phits + local_idx);
66  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
67  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
68 
69 #ifdef BL_DUMP_HITS
70  __shared__ int done;
71  done = 0;
72  __syncthreads();
73  bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
74 #endif
75 
76  // Prepare data structure
77  auto const *hitId = foundNtuplets->begin(tkid);
78  for (unsigned int i = 0; i < hitsInFit; ++i) {
79  auto hit = hitId[i];
80  float ge[6];
81  hhp->cpeParams()
82  .detParams(hhp->detectorIndex(hit))
83  .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
84 #ifdef BL_DUMP_HITS
85  if (dump) {
86  printf("Hit global: %d: %d hits.col(%d) << %f,%f,%f\n",
87  tkid,
88  hhp->detectorIndex(hit),
89  i,
90  hhp->xGlobal(hit),
91  hhp->yGlobal(hit),
92  hhp->zGlobal(hit));
93  printf("Error: %d: %d hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n",
94  tkid,
95  hhp->detetectorIndex(hit),
96  i,
97  ge[0],
98  ge[1],
99  ge[2],
100  ge[3],
101  ge[4],
102  ge[5]);
103  }
104 #endif
105  hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
106  hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
107  }
108  brokenline::fastFit(hits, fast_fit);
109 
110  // no NaN here....
111  assert(fast_fit(0) == fast_fit(0));
112  assert(fast_fit(1) == fast_fit(1));
113  assert(fast_fit(2) == fast_fit(2));
114  assert(fast_fit(3) == fast_fit(3));
115  }
116 }
117 
118 template <int N>
119 __global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
120  double bField,
122  double *__restrict__ phits,
123  float *__restrict__ phits_ge,
124  double *__restrict__ pfast_fit,
125  uint32_t nHits,
126  uint32_t offset) {
127  assert(N <= nHits);
128 
129  assert(results);
130  assert(pfast_fit);
131 
132  // same as above...
133 
134  // look in bin for this hit multiplicity
135  auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
136  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
137  local_idx += gridDim.x * blockDim.x) {
138  auto tuple_idx = local_idx + offset;
139  if (tuple_idx >= tupleMultiplicity->size(nHits))
140  break;
141 
142  // get it for the ntuple container (one to one to helix)
143  auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
144 
145  riemannFit::Map3xNd<N> hits(phits + local_idx);
146  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
147  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
148 
150 
153 
155  brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
156  brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
157 
158  results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
159  results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
160  results->eta(tkid) = asinhf(line.par(0));
161  results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);
162 
163 #ifdef BROKENLINE_DEBUG
164  if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
165  printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
166  printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
167  N,
168  nHits,
169  tkid,
170  circle.par(0),
171  circle.par(1),
172  circle.par(2));
173  printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
174  printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
175  circle.chi2,
176  line.chi2,
177  circle.cov(0, 0),
178  circle.cov(1, 1),
179  circle.cov(2, 2),
180  line.cov(0, 0),
181  line.cov(1, 1));
182 #endif
183  }
184 }
pixelCPEforGPU.h
riemannFit::CircleFit::par
Vector3d par
parameter: (X0,Y0,R)
Definition: FitResult.h:27
mps_fire.i
i
Definition: mps_fire.py:428
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
phits_ge
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ phits_ge
Definition: BrokenLineFitOnGPU.h:27
hfClusterShapes_cfi.hits
hits
Definition: hfClusterShapes_cfi.py:5
brokenline::prepareBrokenLineData
__host__ __device__ void prepareBrokenLineData(const M3xN &hits, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &results)
Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and ...
Definition: BrokenLine.h:150
nt
int nt
Definition: AMPTWrapper.h:42
TrackingRecHit2DHeterogeneous.h
phits
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ double *__restrict__ phits
Definition: BrokenLineFitOnGPU.h:27
cms::cudacompat::__syncthreads
void __syncthreads()
Definition: cudaCompat.h:77
bookConverter.results
results
Definition: bookConverter.py:144
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:15
brokenline::circleFit
__host__ __device__ void circleFit(const M3xN &hits, const M6xN &hits_ge, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &data, karimaki_circle_fit &circle_results)
Performs the Broken Line fit in the curved track case (that is, the fit parameters are the intercepti...
Definition: BrokenLine.h:296
local_start
auto local_start
Definition: BrokenLineFitOnGPU.h:44
__global__
#define __global__
Definition: cudaCompat.h:19
riemannFit::CircleFit
Definition: FitResult.h:26
riemannFit::maxNumberOfConcurrentFits
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFitOnGPU.h:12
brokenline::lineFit
__host__ __device__ void lineFit(const M6xN &hits_ge, const V4 &fast_fit, const double bField, const PreparedBrokenLineData< n > &data, riemannFit::LineFit &line_results)
Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the int...
Definition: BrokenLine.h:450
nHits
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHits
Definition: BrokenLineFitOnGPU.h:27
fileCollector.done
done
Definition: fileCollector.py:123
tupleMultiplicity
const caConstants::TupleMultiplicity *__restrict__ tupleMultiplicity
Definition: BrokenLineFitOnGPU.h:27
BrokenLine.h
N
#define N
Definition: blowfish.cc:9
LaserClient_cfi.nbins
nbins
Definition: LaserClient_cfi.py:51
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:51
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:14
riemannFit::CircleFit::chi2
float chi2
Definition: FitResult.h:35
HelixFitOnGPU.h
pfast_fit
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
Definition: BrokenLineFitOnGPU.h:27
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
riemannFit::Map6xNf
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:28
riemannFit::CircleFit::cov
Matrix3d cov
Definition: FitResult.h:28
FrontierConditions_GlobalTag_cff.dump
dump
Definition: FrontierConditions_GlobalTag_cff.py:12
riemannFit::LineFit
Definition: FitResult.h:38
foundNtuplets
const uint32_t *__restrict__ HitContainer * foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:124
cudaCheck.h
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
brokenline::PreparedBrokenLineData
data needed for the Broken Line fit procedure.
Definition: BrokenLine.h:24
cms::cuda::HistoContainer::nbins
static constexpr uint32_t nbins()
Definition: HistoContainer.h:175
Calorimetry_cff.bField
bField
Definition: Calorimetry_cff.py:284
riemannFit::Map3xNd
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:23
amptDefault_cfi.frame
frame
Definition: amptDefault_cfi.py:12
data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:69
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
hhp
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ hhp
Definition: BrokenLineFitOnGPU.h:27
pixelTrack::TrackSoA
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
Definition: TrackSoAHeterogeneousT.h:67
cms::cuda::HistoContainer
Definition: HistoContainer.h:152
brokenline::fastFit
__host__ __device__ void fastFit(const M3xN &hits, V4 &result)
A very fast helix fit.
Definition: BrokenLine.h:249
offset
const caConstants::TupleMultiplicity *__restrict__ const HitsOnGPU *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t offset
Definition: BrokenLineFitOnGPU.h:33
cuda_assert.h
mps_splice.line
line
Definition: mps_splice.py:76
hit
Definition: SiStripHitEffFromCalibTree.cc:88
riemannFit::Map4d
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:30
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32
assert
assert(hitsInFit<=nHits)