CMS 3D CMS Logo

Typedefs | Functions | Variables
BrokenLineFitOnGPU.h File Reference
#include <cstdint>
#include <cuda_runtime.h>
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h"
#include "HelixFitOnGPU.h"

Go to the source code of this file.

Typedefs

using HitsOnGPU = TrackingRecHit2DSOAView
 
using OutputSoA = pixelTrack::TrackSoA
 
using tindex_type = caConstants::tindex_type
 
using Tuples = pixelTrack::HitContainer
 

Functions

template<int N>
 __attribute__ ((always_inline)) void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets
 
 assert (hitsInFit<=nHitsL)
 
 assert (nHitsL<=nHitsH)
 
 assert (hhp)
 
 assert (phits)
 
 assert (pfast_fit)
 
 assert (foundNtuplets)
 
 assert (tupleMultiplicity)
 
 assert (totTK<=int(tupleMultiplicity->size()))
 
 assert (totTK >=0)
 
 for (int local_idx=local_start, nt=riemannFit::maxNumberOfConcurrentFits;local_idx< nt;local_idx+=gridDim.x *blockDim.x)
 

Variables

caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ hhp
 
constexpr auto invalidTkId = std::numeric_limits<tindex_type>::max()
 
auto local_start = blockIdx.x * blockDim.x + threadIdx.x
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t nHitsH
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHitsL
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t int32_t offset
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ phits
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
 
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ ptkids
 
int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL)
 
caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity
 

Typedef Documentation

◆ HitsOnGPU

Definition at line 19 of file BrokenLineFitOnGPU.h.

◆ OutputSoA

Definition at line 21 of file BrokenLineFitOnGPU.h.

◆ tindex_type

Definition at line 22 of file BrokenLineFitOnGPU.h.

◆ Tuples

Definition at line 20 of file BrokenLineFitOnGPU.h.

Function Documentation

◆ __attribute__()

template<int N>
__attribute__ ( (always_inline)  ) const
inline

Definition at line 170 of file BrokenLineFitOnGPU.h.

References funct::abs(), assert(), Calorimetry_cff::bField, cms::cudacompat::blockDim, cms::cudacompat::blockIdx, riemannFit::CircleFit::chi2, brokenline::circleFit(), riemannFit::CircleFit::cov, data, dqmMemoryStats::float, cms::cudacompat::gridDim, hfClusterShapes_cfi::hits, invalidTkId, mps_splice::line, brokenline::lineFit(), local_start, riemannFit::maxNumberOfConcurrentFits, caConstants::maxTuples, N, nHits, nt, riemannFit::CircleFit::par, pfast_fit, phits, phits_ge, brokenline::prepareBrokenLineData(), ptkids, bookConverter::results, and cms::cudacompat::threadIdx.

176  {
177  assert(results);
178  assert(pfast_fit);
179 
180  // same as above...
181 
182  // look in bin for this hit multiplicity
183  auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
184  for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
185  local_idx += gridDim.x * blockDim.x) {
186  if (invalidTkId == ptkids[local_idx])
187  break;
188 
189  auto tkid = ptkids[local_idx];
190 
192 
193  riemannFit::Map3xNd<N> hits(phits + local_idx);
194  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
195  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
196 
198 
201 
203  brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
204  brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
205 
206  results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
207  results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
208  results->eta(tkid) = asinhf(line.par(0));
209  results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);
210 
211 #ifdef BROKENLINE_DEBUG
212  if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
213  printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
214  printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
215  N,
216  nHits,
217  tkid,
218  circle.par(0),
219  circle.par(1),
220  circle.par(2));
221  printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
222  printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
223  circle.chi2,
224  line.chi2,
225  circle.cov(0, 0),
226  circle.cov(1, 1),
227  circle.cov(2, 2),
228  line.cov(0, 0),
229  line.cov(1, 1));
230 #endif
231  }
232 }
const dim3 threadIdx
Definition: cudaCompat.h:29
__host__ __device__ void lineFit(const M6xN &hits_ge, const V4 &fast_fit, const double bField, const PreparedBrokenLineData< n > &data, riemannFit::LineFit &line_results)
Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the int...
Definition: BrokenLine.h:463
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ ptkids
const dim3 gridDim
Definition: cudaCompat.h:33
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
constexpr uint32_t maxTuples
Definition: CAConstants.h:46
const dim3 blockDim
Definition: cudaCompat.h:30
auto local_start
Vector3d par
parameter: (X0,Y0,R)
Definition: FitResult.h:27
__host__ __device__ void prepareBrokenLineData(const M3xN &hits, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &results)
Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and ...
Definition: BrokenLine.h:150
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:23
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
const dim3 blockIdx
Definition: cudaCompat.h:32
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:28
int nt
Definition: AMPTWrapper.h:42
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
assert(hitsInFit<=nHitsL)
#define N
Definition: blowfish.cc:9
constexpr auto invalidTkId
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:30
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
__host__ __device__ void circleFit(const M3xN &hits, const M6xN &hits_ge, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &data, karimaki_circle_fit &circle_results)
Performs the Broken Line fit in the curved track case (that is, the fit parameters are the intercepti...
Definition: BrokenLine.h:314
data needed for the Broken Line fit procedure.
Definition: BrokenLine.h:24
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ phits
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFitOnGPU.h:12

◆ assert() [1/9]

assert ( hitsInFit<=  nHitsL)

Referenced by __attribute__(), and for().

◆ assert() [2/9]

assert ( nHitsL<=  nHitsH)

◆ assert() [3/9]

assert ( hhp  )

◆ assert() [4/9]

assert ( phits  )

◆ assert() [5/9]

assert ( pfast_fit  )

◆ assert() [6/9]

assert ( foundNtuplets  )

◆ assert() [7/9]

assert ( tupleMultiplicity  )

◆ assert() [8/9]

assert ( totTK<=  inttupleMultiplicity->size())

◆ assert() [9/9]

assert ( totTK >=  0)

◆ for()

for ( int  local_idx = local_start)

Definition at line 61 of file BrokenLineFitOnGPU.h.

References cms::cudacompat::__syncthreads(), funct::abs(), assert(), cms::cudacompat::atomicAdd(), newFWLiteAna::bin, fileCollector::done, Calorimetry_cff::dp, GCP_Ntuples_cfg::dump, PVValHelper::dx, PVValHelper::dy, PVValHelper::dz, f, brokenline::fastFit(), foundNtuplets, hhp, hfClusterShapes_cfi::hits, mps_fire::i, createfilelist::int, invalidTkId, dqmiolumiharvest::j, CPEFastParametrisation::kGenErrorQBins, CPEFastParametrisation::kNumErrorBins, SiStripPI::max, dqmiodumpmetadata::n, nHits, nHitsH, nHitsL, offset, pfast_fit, phits, phits_ge, phase1PixelTopology::pixelPitchY, ptkids, mps_update::status, totTK, and tupleMultiplicity.

62  {
63  int tuple_idx = local_idx + offset;
64  if (tuple_idx >= totTK) {
65  ptkids[local_idx] = invalidTkId;
66  break;
67  }
68  // get it from the ntuple container (one to one to helix)
69  auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
70  assert(tkid < foundNtuplets->nOnes());
71 
72  ptkids[local_idx] = tkid;
73 
74  auto nHits = foundNtuplets->size(tkid);
75 
76  assert(nHits >= nHitsL);
77  assert(nHits <= nHitsH);
78 
79  riemannFit::Map3xNd<N> hits(phits + local_idx);
80  riemannFit::Map4d fast_fit(pfast_fit + local_idx);
81  riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
82 
83 #ifdef BL_DUMP_HITS
84  __shared__ int done;
85  done = 0;
86  __syncthreads();
87  bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
88 #endif
89 
90  // Prepare data structure
91  auto const *hitId = foundNtuplets->begin(tkid);
92 
93  // #define YERR_FROM_DC
94 #ifdef YERR_FROM_DC
95  // try to compute more precise error in y
96  auto dx = hhp->xGlobal(hitId[hitsInFit - 1]) - hhp->xGlobal(hitId[0]);
97  auto dy = hhp->yGlobal(hitId[hitsInFit - 1]) - hhp->yGlobal(hitId[0]);
98  auto dz = hhp->zGlobal(hitId[hitsInFit - 1]) - hhp->zGlobal(hitId[0]);
99  float ux, uy, uz;
100 #endif
101 
102  float incr = std::max(1.f, float(nHits) / float(hitsInFit));
103  float n = 0;
104  for (uint32_t i = 0; i < hitsInFit; ++i) {
105  int j = int(n + 0.5f); // round
106  if (hitsInFit - 1 == i)
107  j = nHits - 1; // force last hit to ensure max lever arm.
108  assert(j < int(nHits));
109  n += incr;
110  auto hit = hitId[j];
111  float ge[6];
112 
113 #ifdef YERR_FROM_DC
114  auto const &dp = hhp->cpeParams().detParams(hhp->detectorIndex(hit));
115  auto status = hhp->status(hit);
116  int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
117  assert(qbin >= 0 && qbin < 5);
118  bool nok = (status.isBigY | status.isOneY);
119  // compute cotanbeta and use it to recompute error
120  dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
121  auto cb = std::abs(uy / uz);
122  int bin =
123  int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
124  int low_value = 0;
125  int high_value = CPEFastParametrisation::kNumErrorBins - 1;
126  // return estimated bin value truncated to [0, 15]
127  bin = std::clamp(bin, low_value, high_value);
128  float yerr = dp.sigmay[bin] * 1.e-4f; // toCM
129  yerr *= dp.yfact[qbin]; // inflate
130  yerr *= yerr;
131  yerr += dp.apeYY;
132  yerr = nok ? hhp->yerrLocal(hit) : yerr;
133  dp.frame.toGlobal(hhp->xerrLocal(hit), 0, yerr, ge);
134 #else
135  hhp->cpeParams()
136  .detParams(hhp->detectorIndex(hit))
137  .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
138 #endif
139 
140 #ifdef BL_DUMP_HITS
141  bool dump = foundNtuplets->size(tkid) == 5;
142  if (dump) {
143  printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
144  local_idx,
145  tkid,
146  hit,
147  hhp->detectorIndex(hit),
148  i,
149  hhp->xGlobal(hit),
150  hhp->yGlobal(hit),
151  hhp->zGlobal(hit));
152  printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
153  }
154 #endif
155 
156  hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
157  hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
158  }
159  brokenline::fastFit(hits, fast_fit);
160 
161  // no NaN here....
162  assert(fast_fit(0) == fast_fit(0));
163  assert(fast_fit(1) == fast_fit(1));
164  assert(fast_fit(2) == fast_fit(2));
165  assert(fast_fit(3) == fast_fit(3));
166  }
caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity
constexpr uint16_t pixelPitchY
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ ptkids
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t int32_t offset
constexpr int kGenErrorQBins
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHitsL
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:23
__host__ __device__ void fastFit(const M3xN &hits, V4 &result)
A very fast helix fit.
Definition: BrokenLine.h:258
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:28
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
assert(hitsInFit<=nHitsL)
int totTK
constexpr auto invalidTkId
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ hhp
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:30
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
void __syncthreads()
Definition: cudaCompat.h:132
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t nHitsH
auto const & foundNtuplets
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr int kNumErrorBins
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ tindex_type *__restrict__ double *__restrict__ phits

Variable Documentation

◆ hhp

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ hhp

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ invalidTkId

constexpr auto invalidTkId = std::numeric_limits<tindex_type>::max()

Definition at line 23 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ local_start

auto local_start = blockIdx.x * blockDim.x + threadIdx.x

Definition at line 49 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__().

◆ nHitsH

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t uint32_t nHitsH

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ nHitsL

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t nHitsL

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ offset

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t uint32_t int32_t offset
Initial value:
{
constexpr uint32_t hitsInFit = N
#define N
Definition: blowfish.cc:9

Definition at line 37 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ pfast_fit

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ pfast_fit

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ phits

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ phits

◆ phits_ge

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ phits_ge

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ ptkids

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ ptkids

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ totTK

int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL)

Definition at line 50 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ tupleMultiplicity

Quality const *__restrict__ caConstants::TupleMultiplicity * tupleMultiplicity
Initial value:
{
auto first = blockIdx.x * blockDim.x + threadIdx.x
const dim3 threadIdx
Definition: cudaCompat.h:29
const dim3 blockDim
Definition: cudaCompat.h:30
const dim3 blockIdx
Definition: cudaCompat.h:32

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by HelixFitOnGPU::allocateOnGPU(), and for().