#include <cstdint>
#include <cuda_runtime.h>
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h"
#include "HelixFitOnGPU.h"

Typedefs
using	HitsOnGPU = TrackingRecHit2DSOAView

using	OutputSoA = pixelTrack::TrackSoA

using	tindex_type = caConstants::tindex_type

using	Tuples = pixelTrack::HitContainer

Functions
template<int N>
	__attribute__ ((always_inline)) void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets

	assert (hitsInFit<=nHitsL)

	assert (nHitsL<=nHitsH)

	assert (hhp)

	assert (phits)

	assert (pfast_fit)

	assert (foundNtuplets)

	assert (tupleMultiplicity)

	assert (totTK<=int(tupleMultiplicity->size()))

	assert (totTK >=0)

	for (int local_idx=local_start, nt=riemannFit::maxNumberOfConcurrentFits;local_idx< nt;local_idx+=gridDim.x *blockDim.x)

Variables
caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__	hhp

constexpr auto	invalidTkId = std::numeric_limits<tindex_type>::max()

auto	local_start = blockIdx.x * blockDim.x + threadIdx.x

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__ float __restrict__ double __restrict__ uint32_t uint32_t	nHitsH

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__ float __restrict__ double __restrict__ uint32_t	nHitsL

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__ float __restrict__ double __restrict__ uint32_t uint32_t int32_t	offset

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__ float __restrict__ double __restrict__	pfast_fit

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__	phits

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type __restrict__ double __restrict__ float *__restrict__	phits_ge

caConstants::TupleMultiplicity const __restrict__ HitsOnGPU const __restrict__ tindex_type *__restrict__	ptkids

int	totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL)

caConstants::TupleMultiplicity const *__restrict__	tupleMultiplicity

Typedef Documentation

◆ HitsOnGPU

using HitsOnGPU = TrackingRecHit2DSOAView

Definition at line 19 of file BrokenLineFitOnGPU.h.

◆ OutputSoA

using OutputSoA = pixelTrack::TrackSoA

Definition at line 21 of file BrokenLineFitOnGPU.h.

◆ tindex_type

using tindex_type = caConstants::tindex_type

Definition at line 22 of file BrokenLineFitOnGPU.h.

◆ Tuples

using Tuples = pixelTrack::HitContainer

Definition at line 20 of file BrokenLineFitOnGPU.h.

Function Documentation

◆ attribute()

template<int N>

__attribute__ ( (always_inline) ) const

inline

Definition at line 170 of file BrokenLineFitOnGPU.h.

References funct::abs(), assert(), Calorimetry_cff::bField, cms::cudacompat::blockDim, cms::cudacompat::blockIdx, riemannFit::CircleFit::chi2, brokenline::circleFit(), riemannFit::CircleFit::cov, data, dqmMemoryStats::float, cms::cudacompat::gridDim, hfClusterShapes_cfi::hits, invalidTkId, mps_splice::line, brokenline::lineFit(), local_start, riemannFit::maxNumberOfConcurrentFits, caConstants::maxTuples, N, nHits, nt, riemannFit::CircleFit::par, pfast_fit, phits, phits_ge, brokenline::prepareBrokenLineData(), ptkids, bookConverter::results, and cms::cudacompat::threadIdx.

                                                              {
   assert(results);
   assert(pfast_fit);
 
   // same as above...
 
   // look in bin for this hit multiplicity
   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
        local_idx += gridDim.x * blockDim.x) {
     if (invalidTkId == ptkids[local_idx])
       break;
 
     auto tkid = ptkids[local_idx];
 
     assert(tkid < caConstants::maxTuples);
 
     riemannFit::Map3xNd<N> hits(phits + local_idx);
     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
 
     brokenline::PreparedBrokenLineData<N> data;
 
     brokenline::karimaki_circle_fit circle;
     riemannFit::LineFit line;
 
     brokenline::prepareBrokenLineData(hits, fast_fit, bField, data);
     brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
     brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
 
     results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
     results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
     results->eta(tkid) = asinhf(line.par(0));
     results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);
 
 #ifdef BROKENLINE_DEBUG
     if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
       printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
     printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
            N,
            nHits,
            tkid,
            circle.par(0),
            circle.par(1),
            circle.par(2));
     printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
     printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
            circle.chi2,
            line.chi2,
            circle.cov(0, 0),
            circle.cov(1, 1),
            circle.cov(2, 2),
            line.cov(0, 0),
            line.cov(1, 1));
 #endif
   }
 }

◆ assert() [1/9]

assert ( hitsInFit<= nHitsL )

Referenced by __attribute__(), and for().

◆ assert() [2/9]

assert ( nHitsL<= nHitsH )

◆ assert() [3/9]

assert ( hhp )

◆ assert() [4/9]

assert ( phits )

◆ assert() [5/9]

assert ( pfast_fit )

◆ assert() [6/9]

assert ( foundNtuplets )

◆ assert() [7/9]

assert ( tupleMultiplicity )

◆ assert() [8/9]

assert ( totTK<= inttupleMultiplicity->size() )

◆ assert() [9/9]

assert ( totTK >= 0 )

◆ for()

for ( int local_idx = local_start )

Definition at line 61 of file BrokenLineFitOnGPU.h.

References cms::cudacompat::__syncthreads(), funct::abs(), assert(), cms::cudacompat::atomicAdd(), newFWLiteAna::bin, fileCollector::done, Calorimetry_cff::dp, GCP_Ntuples_cfg::dump, PVValHelper::dx, PVValHelper::dy, PVValHelper::dz, f, brokenline::fastFit(), foundNtuplets, hhp, hfClusterShapes_cfi::hits, mps_fire::i, createfilelist::int, invalidTkId, dqmiolumiharvest::j, CPEFastParametrisation::kGenErrorQBins, CPEFastParametrisation::kNumErrorBins, SiStripPI::max, dqmiodumpmetadata::n, nHits, nHitsH, nHitsL, offset, pfast_fit, phits, phits_ge, phase1PixelTopology::pixelPitchY, ptkids, mps_update::status, totTK, and tupleMultiplicity.

                                             {
     int tuple_idx = local_idx + offset;
     if (tuple_idx >= totTK) {
       ptkids[local_idx] = invalidTkId;
       break;
     }
     // get it from the ntuple container (one to one to helix)
     auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
     assert(tkid < foundNtuplets->nOnes());
 
     ptkids[local_idx] = tkid;
 
     auto nHits = foundNtuplets->size(tkid);
 
     assert(nHits >= nHitsL);
     assert(nHits <= nHitsH);
 
     riemannFit::Map3xNd<N> hits(phits + local_idx);
     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
 
 #ifdef BL_DUMP_HITS
     __shared__ int done;
     done = 0;
     __syncthreads();
     bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
 #endif
 
     // Prepare data structure
     auto const *hitId = foundNtuplets->begin(tkid);
 
     // #define YERR_FROM_DC
 #ifdef YERR_FROM_DC
     // try to compute more precise error in y
     auto dx = hhp->xGlobal(hitId[hitsInFit - 1]) - hhp->xGlobal(hitId[0]);
     auto dy = hhp->yGlobal(hitId[hitsInFit - 1]) - hhp->yGlobal(hitId[0]);
     auto dz = hhp->zGlobal(hitId[hitsInFit - 1]) - hhp->zGlobal(hitId[0]);
     float ux, uy, uz;
 #endif
 
     float incr = std::max(1.f, float(nHits) / float(hitsInFit));
     float n = 0;
     for (uint32_t i = 0; i < hitsInFit; ++i) {
       int j = int(n + 0.5f);  // round
       if (hitsInFit - 1 == i)
         j = nHits - 1;  // force last hit to ensure max lever arm.
       assert(j < int(nHits));
       n += incr;
       auto hit = hitId[j];
       float ge[6];
 
 #ifdef YERR_FROM_DC
       auto const &dp = hhp->cpeParams().detParams(hhp->detectorIndex(hit));
       auto status = hhp->status(hit);
       int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
       assert(qbin >= 0 && qbin < 5);
       bool nok = (status.isBigY | status.isOneY);
       // compute cotanbeta and use it to recompute error
       dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
       auto cb = std::abs(uy / uz);
       int bin =
           int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
       int low_value = 0;
       int high_value = CPEFastParametrisation::kNumErrorBins - 1;
       // return estimated bin value truncated to [0, 15]
       bin = std::clamp(bin, low_value, high_value);
       float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
       yerr *= dp.yfact[qbin];                // inflate
       yerr *= yerr;
       yerr += dp.apeYY;
       yerr = nok ? hhp->yerrLocal(hit) : yerr;
       dp.frame.toGlobal(hhp->xerrLocal(hit), 0, yerr, ge);
 #else
       hhp->cpeParams()
           .detParams(hhp->detectorIndex(hit))
           .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
 #endif
 
 #ifdef BL_DUMP_HITS
       bool dump = foundNtuplets->size(tkid) == 5;
       if (dump) {
         printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
                local_idx,
                tkid,
                hit,
                hhp->detectorIndex(hit),
                i,
                hhp->xGlobal(hit),
                hhp->yGlobal(hit),
                hhp->zGlobal(hit));
         printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
       }
 #endif
 
       hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
       hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
     }
     brokenline::fastFit(hits, fast_fit);
 
     // no NaN here....
     assert(fast_fit(0) == fast_fit(0));
     assert(fast_fit(1) == fast_fit(1));
     assert(fast_fit(2) == fast_fit(2));
     assert(fast_fit(3) == fast_fit(3));
   }

Variable Documentation

◆ hhp

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ hhp

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ invalidTkId

constexpr auto invalidTkId = std::numeric_limits<tindex_type>::max()

Definition at line 23 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ local_start

auto local_start = blockIdx.x * blockDim.x + threadIdx.x

Definition at line 49 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__().

◆ nHitsH

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t uint32_t nHitsH

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ nHitsL

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t nHitsL

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ offset

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ uint32_t uint32_t int32_t offset

Initial value:

{

constexpr uint32_t hitsInFit = N

N

#define N

Definition: blowfish.cc:9

Definition at line 37 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ pfast_fit

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ double* __restrict__ pfast_fit

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ phits

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ phits

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by gpuPixelRecHits::__attribute__(), __attribute__(), EcalTBValidation::analyze(), EcalSimpleTBAnalyzer::analyze(), EcalSimple2007H4TBAnalyzer::analyze(), ElectronCalibration::analyze(), FWTrackingParticleProxyBuilderFullFramework::build(), ZeeCalibration::duringLoop(), ElectronCalibration::findMaxHit(), for(), JetMatchingTools::getGenParticles(), PhotonConversionTrajectorySeedProducerFromQuadrupletsAlgo::inspect(), JetMatchingTools::lostEnergyFraction(), and SeedForPhotonConversionFromQuadruplets::trajectorySeed().

◆ phits_ge

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ double* __restrict__ float* __restrict__ phits_ge

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ ptkids

caConstants::TupleMultiplicity const* __restrict__ HitsOnGPU const* __restrict__ tindex_type* __restrict__ ptkids

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by __attribute__(), and for().

◆ totTK

int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL)

Definition at line 50 of file BrokenLineFitOnGPU.h.

Referenced by for().

◆ tupleMultiplicity

Quality const *__restrict__ caConstants::TupleMultiplicity * tupleMultiplicity

Initial value:

{

auto first = blockIdx.x * blockDim.x + threadIdx.x

cms::cudacompat::threadIdx

const dim3 threadIdx

Definition: cudaCompat.h:29

first

auto first

Definition: CAHitNtupletGeneratorKernelsImpl.h:127

cms::cudacompat::blockDim

const dim3 blockDim

Definition: cudaCompat.h:30

cms::cudacompat::blockIdx

const dim3 blockIdx

Definition: cudaCompat.h:32

Definition at line 29 of file BrokenLineFitOnGPU.h.

Referenced by HelixFitOnGPU::allocateOnGPU(), and for().

Typedefs

Functions

Variables

Typedef Documentation

◆ HitsOnGPU

◆ OutputSoA

◆ tindex_type

◆ Tuples

Function Documentation

◆ __attribute__()

◆ assert() [1/9]

◆ assert() [2/9]

◆ assert() [3/9]

◆ assert() [4/9]

◆ assert() [5/9]

◆ assert() [6/9]

◆ assert() [7/9]

◆ assert() [8/9]

◆ assert() [9/9]

◆ for()

Variable Documentation

◆ hhp

◆ invalidTkId

◆ local_start

◆ nHitsH

◆ nHitsL

◆ offset

◆ pfast_fit

◆ phits

◆ phits_ge

◆ ptkids

◆ totTK

◆ tupleMultiplicity

◆ attribute()