df/d8c/BrokenLineFitOnGPU_8h_source.html

 //
 // Author: Felice Pantaleo, CERN
 //

 // #define BROKENLINE_DEBUG

 #include <cstdint>

 #include <cuda_runtime.h>

 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
 #include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h"

 #include "HelixFitOnGPU.h"

 using HitsOnGPU = TrackingRecHit2DSOAView;
 using Tuples = pixelTrack::HitContainer;
 using OutputSoA = pixelTrack::TrackSoA;
 using tindex_type = caConstants::tindex_type;
 constexpr auto invalidTkId = std::numeric_limits<tindex_type>::max();

 // #define BL_DUMP_HITS

 template <int N>
 __global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets,
                                  caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
                                  HitsOnGPU const *__restrict__ hhp,
                                  tindex_type *__restrict__ ptkids,
                                  double *__restrict__ phits,
                                  float *__restrict__ phits_ge,
                                  double *__restrict__ pfast_fit,
                                  uint32_t nHitsL,
                                  uint32_t nHitsH,
                                  int32_t offset) {
   constexpr uint32_t hitsInFit = N;

   assert(hitsInFit <= nHitsL);
   assert(nHitsL <= nHitsH);
   assert(hhp);
   assert(phits);
   assert(pfast_fit);
   assert(foundNtuplets);
   assert(tupleMultiplicity);

   // look in bin for this hit multiplicity
   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
   int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
   assert(totTK <= int(tupleMultiplicity->size()));
   assert(totTK >= 0);

 #ifdef BROKENLINE_DEBUG
   if (0 == local_start) {
     printf("%d total Ntuple\n", tupleMultiplicity->size());
     printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
   }
 #endif

   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
        local_idx += gridDim.x * blockDim.x) {
     int tuple_idx = local_idx + offset;
     if (tuple_idx >= totTK) {
       ptkids[local_idx] = invalidTkId;
       break;
     }
     // get it from the ntuple container (one to one to helix)
     auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
     assert(tkid < foundNtuplets->nOnes());

     ptkids[local_idx] = tkid;

     auto nHits = foundNtuplets->size(tkid);

     assert(nHits >= nHitsL);
     assert(nHits <= nHitsH);

     riemannFit::Map3xNd<N> hits(phits + local_idx);
     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);

 #ifdef BL_DUMP_HITS
     __shared__ int done;
     done = 0;
     __syncthreads();
     bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
 #endif

     // Prepare data structure
     auto const *hitId = foundNtuplets->begin(tkid);

     // #define YERR_FROM_DC
 #ifdef YERR_FROM_DC
     // try to compute more precise error in y
     auto dx = hhp->xGlobal(hitId[hitsInFit - 1]) - hhp->xGlobal(hitId[0]);
     auto dy = hhp->yGlobal(hitId[hitsInFit - 1]) - hhp->yGlobal(hitId[0]);
     auto dz = hhp->zGlobal(hitId[hitsInFit - 1]) - hhp->zGlobal(hitId[0]);
     float ux, uy, uz;
 #endif

     float incr = std::max(1.f, float(nHits) / float(hitsInFit));
     float n = 0;
     for (uint32_t i = 0; i < hitsInFit; ++i) {
       int j = int(n + 0.5f);  // round
       if (hitsInFit - 1 == i)
         j = nHits - 1;  // force last hit to ensure max lever arm.
       assert(j < int(nHits));
       n += incr;
       auto hit = hitId[j];
       float ge[6];

 #ifdef YERR_FROM_DC
       auto const &dp = hhp->cpeParams().detParams(hhp->detectorIndex(hit));
       auto status = hhp->status(hit);
       int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
       assert(qbin >= 0 && qbin < 5);
       bool nok = (status.isBigY | status.isOneY);
       // compute cotanbeta and use it to recompute error
       dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
       auto cb = std::abs(uy / uz);
       int bin =
           int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
       int low_value = 0;
       int high_value = CPEFastParametrisation::kNumErrorBins - 1;
       // return estimated bin value truncated to [0, 15]
       bin = std::clamp(bin, low_value, high_value);
       float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
       yerr *= dp.yfact[qbin];                // inflate
       yerr *= yerr;
       yerr += dp.apeYY;
       yerr = nok ? hhp->yerrLocal(hit) : yerr;
       dp.frame.toGlobal(hhp->xerrLocal(hit), 0, yerr, ge);
 #else
       hhp->cpeParams()
           .detParams(hhp->detectorIndex(hit))
           .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
 #endif

 #ifdef BL_DUMP_HITS
       bool dump = foundNtuplets->size(tkid) == 5;
       if (dump) {
         printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
                local_idx,
                tkid,
                hit,
                hhp->detectorIndex(hit),
                i,
                hhp->xGlobal(hit),
                hhp->yGlobal(hit),
                hhp->zGlobal(hit));
         printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
       }
 #endif

       hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
       hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
     }
     brokenline::fastFit(hits, fast_fit);

     // no NaN here....
     assert(fast_fit(0) == fast_fit(0));
     assert(fast_fit(1) == fast_fit(1));
     assert(fast_fit(2) == fast_fit(2));
     assert(fast_fit(3) == fast_fit(3));
   }
 }

 template <int N>
 __global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
                              double bField,
                              OutputSoA *results,
                              tindex_type const *__restrict__ ptkids,
                              double *__restrict__ phits,
                              float *__restrict__ phits_ge,
                              double *__restrict__ pfast_fit) {
   assert(results);
   assert(pfast_fit);

   // same as above...

   // look in bin for this hit multiplicity
   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
        local_idx += gridDim.x * blockDim.x) {
     if (invalidTkId == ptkids[local_idx])
       break;

     auto tkid = ptkids[local_idx];

     assert(tkid < caConstants::maxTuples);

     riemannFit::Map3xNd<N> hits(phits + local_idx);
     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);

     brokenline::PreparedBrokenLineData<N> data;

     brokenline::karimaki_circle_fit circle;
     riemannFit::LineFit line;

     brokenline::prepareBrokenLineData(hits, fast_fit, bField, data);
     brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
     brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);

     results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
     results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
     results->eta(tkid) = asinhf(line.par(0));
     results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);

 #ifdef BROKENLINE_DEBUG
     if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
       printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
     printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
            N,
            nHits,
            tkid,
            circle.par(0),
            circle.par(1),
            circle.par(2));
     printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
     printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
            circle.chi2,
            line.chi2,
            circle.cov(0, 0),
            circle.cov(1, 1),
            circle.cov(2, 2),
            line.cov(0, 0),
            line.cov(1, 1));
 #endif
   }
 }
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29

brokenline::lineFit
__host__ __device__ void lineFit(const M6xN &hits_ge, const V4 &fast_fit, const double bField, const PreparedBrokenLineData< n > &data, riemannFit::LineFit &line_results)
Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the int...
Definition: BrokenLine.h:463

amptDefault_cfi.frame
frame
Definition: amptDefault_cfi.py:12

tindex_type
caConstants::tindex_type tindex_type
Definition: BrokenLineFitOnGPU.h:22

BrokenLine.h

caConstants::tindex_type
uint16_t tindex_type
Definition: CAConstants.h:69

mps_fire.i
i
Definition: mps_fire.py:428

tupleMultiplicity
caConstants::TupleMultiplicity const  *__restrict__ tupleMultiplicity
Definition: BrokenLineFitOnGPU.h:29

phase1PixelTopology::pixelPitchY
constexpr uint16_t pixelPitchY
Definition: SimplePixelTopology.h:66

ptkids
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ ptkids
Definition: BrokenLineFitOnGPU.h:29

riemannFit::CircleFit
Definition: FitResult.h:26

TrackingRecHit2DHeterogeneous.h

cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33

dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66

phits_ge
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ phits_ge
Definition: BrokenLineFitOnGPU.h:29

Calorimetry_cff.dp
dp
Definition: Calorimetry_cff.py:158

caConstants::maxTuples
constexpr uint32_t maxTuples
Definition: CAConstants.h:46

__global__
#define __global__
Definition: cudaCompat.h:19

hfClusterShapes_cfi.hits
hits
Definition: hfClusterShapes_cfi.py:5

cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30

offset
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t int32_t offset
Definition: BrokenLineFitOnGPU.h:37

local_start
auto local_start
Definition: BrokenLineFitOnGPU.h:49

PVValHelper::dz
Definition: PVValidationHelpers.h:51

bookConverter.results
results
Definition: bookConverter.py:144

CPEFastParametrisation::kGenErrorQBins
constexpr int kGenErrorQBins
Definition: pixelCPEforGPU.h:19

riemannFit::CircleFit::par
Vector3d par
parameter: (X0,Y0,R)
Definition: FitResult.h:27

nHitsL
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t nHitsL
Definition: BrokenLineFitOnGPU.h:29

fileCollector.done
done
Definition: fileCollector.py:123

dqmiodumpmetadata.n
n
Definition: dqmiodumpmetadata.py:28

TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16

Calorimetry_cff.bField
bField
Definition: Calorimetry_cff.py:284

newFWLiteAna.bin
bin
Definition: newFWLiteAna.py:161

brokenline::prepareBrokenLineData
__host__ __device__ void prepareBrokenLineData(const M3xN &hits, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &results)
Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and ...
Definition: BrokenLine.h:150

createfilelist.int
int
Definition: createfilelist.py:10

riemannFit::Map3xNd
Eigen::Map< Matrix3xNd< N >, 0, Eigen::Stride< 3 *stride, stride > > Map3xNd
Definition: HelixFitOnGPU.h:23

PVValHelper::dx
Definition: PVValidationHelpers.h:49

brokenline::fastFit
__host__ __device__ void fastFit(const M3xN &hits, V4 &result)
A very fast helix fit.
Definition: BrokenLine.h:258

mps_splice.line
line
Definition: mps_splice.py:76

funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22

mps_update.status
status
Definition: mps_update.py:68

pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:103

f
double f[11][100]
Definition: MuScleFitUtils.cc:78

riemannFit::CircleFit::chi2
float chi2
Definition: FitResult.h:35

submitPVValidationJobs.dump
dump
Definition: submitPVValidationJobs.py:55

pixelTrack::TrackSoA
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
Definition: TrackSoAHeterogeneousT.h:101

SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:178

cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32

riemannFit::Map6xNf
Eigen::Map< Matrix6xNf< N >, 0, Eigen::Stride< 6 *stride, stride > > Map6xNf
Definition: HelixFitOnGPU.h:28

nt
int nt
Definition: AMPTWrapper.h:42

pfast_fit
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ pfast_fit
Definition: BrokenLineFitOnGPU.h:29

assert
assert(hitsInFit<=nHitsL)

N
#define N
Definition: blowfish.cc:9

totTK
int totTK
Definition: BrokenLineFitOnGPU.h:50

invalidTkId
constexpr auto invalidTkId
Definition: BrokenLineFitOnGPU.h:23

cudaCheck.h

hhp
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ hhp
Definition: BrokenLineFitOnGPU.h:29

riemannFit::Map4d
Eigen::Map< Vector4d, 0, Eigen::InnerStride< stride > > Map4d
Definition: HelixFitOnGPU.h:30

nHits
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const  *__restrict__ uint32_t const  *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
Definition: CAHitNtupletGeneratorKernelsImpl.h:43

cms::cudacompat::__syncthreads
void __syncthreads()
Definition: cudaCompat.h:108

hit
Definition: SiStripHitEffFromCalibTree.cc:88

TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:24

data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79

riemannFit::LineFit
Definition: FitResult.h:38

pixelCPEforGPU.h

nHitsH
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ float *__restrict__ double *__restrict__ uint32_t uint32_t nHitsH
Definition: BrokenLineFitOnGPU.h:29

dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127

PVValHelper::dy
Definition: PVValidationHelpers.h:50

cms::cuda::OneToManyAssoc
Definition: OneToManyAssoc.h:143

HelixFitOnGPU.h

riemannFit::CircleFit::cov
Matrix3d cov
Definition: FitResult.h:28

brokenline::circleFit
__host__ __device__ void circleFit(const M3xN &hits, const M6xN &hits_ge, const V4 &fast_fit, const double bField, PreparedBrokenLineData< n > &data, karimaki_circle_fit &circle_results)
Performs the Broken Line fit in the curved track case (that is, the fit parameters are the intercepti...
Definition: BrokenLine.h:314

brokenline::PreparedBrokenLineData
data needed for the Broken Line fit procedure.
Definition: BrokenLine.h:24

foundNtuplets
auto const  & foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:595

cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61

CPEFastParametrisation::kNumErrorBins
constexpr int kNumErrorBins
Definition: pixelCPEforGPU.h:21

cuda_assert.h

phits
caConstants::TupleMultiplicity const  *__restrict__ HitsOnGPU const  *__restrict__ tindex_type *__restrict__ double *__restrict__ phits
Definition: BrokenLineFitOnGPU.h:29

riemannFit::maxNumberOfConcurrentFits
constexpr uint32_t maxNumberOfConcurrentFits
Definition: HelixFitOnGPU.h:12