d8/d4f/gpuPixelDoubletsAlgos_8h_source.html

 #ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
 #define RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h

 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <cstdio>
 #include <limits>

 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
 #include "DataFormats/Math/interface/approx_atan2.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"

 #include "CAConstants.h"
 #include "GPUCACell.h"

 namespace gpuPixelDoublets {

   using CellNeighbors = caConstants::CellNeighbors;
   using CellTracks = caConstants::CellTracks;
   using CellNeighborsVector = caConstants::CellNeighborsVector;
   using CellTracksVector = caConstants::CellTracksVector;

   __device__ __forceinline__ void doubletsFromHisto(uint8_t const* __restrict__ layerPairs,
                                                     uint32_t nPairs,
                                                     GPUCACell* cells,
                                                     uint32_t* nCells,
                                                     CellNeighborsVector* cellNeighbors,
                                                     CellTracksVector* cellTracks,
                                                     TrackingRecHit2DSOAView const& __restrict__ hh,
                                                     GPUCACell::OuterHitOfCell isOuterHitOfCell,
                                                     int16_t const* __restrict__ phicuts,
                                                     float const* __restrict__ minz,
                                                     float const* __restrict__ maxz,
                                                     float const* __restrict__ maxr,
                                                     bool ideal_cond,
                                                     bool doClusterCut,
                                                     bool doZ0Cut,
                                                     bool doPtCut,
                                                     uint32_t maxNumOfDoublets) {
     // ysize cuts (z in the barrel)  times 8
     // these are used if doClusterCut is true
     constexpr int minYsizeB1 = 36;
     constexpr int minYsizeB2 = 28;
     constexpr int maxDYsize12 = 28;
     constexpr int maxDYsize = 20;
     constexpr int maxDYPred = 20;
     constexpr float dzdrFact = 8 * 0.0285 / 0.015;  // from dz/dr to "DY"

     bool isOuterLadder = ideal_cond;

     using PhiBinner = TrackingRecHit2DSOAView::PhiBinner;

     auto const& __restrict__ phiBinner = hh.phiBinner();
     uint32_t const* __restrict__ offsets = hh.hitsLayerStart();
     assert(offsets);

     auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };

     // nPairsMax to be optimized later (originally was 64).
     // If it should be much bigger, consider using a block-wide parallel prefix scan,
     // e.g. see  https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
     const int nPairsMax = caConstants::maxNumberOfLayerPairs;
     assert(nPairs <= nPairsMax);
     __shared__ uint32_t innerLayerCumulativeSize[nPairsMax];
     __shared__ uint32_t ntot;
     if (threadIdx.y == 0 && threadIdx.x == 0) {
       innerLayerCumulativeSize[0] = layerSize(layerPairs[0]);
       for (uint32_t i = 1; i < nPairs; ++i) {
         innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(layerPairs[2 * i]);
       }
       ntot = innerLayerCumulativeSize[nPairs - 1];
     }
     __syncthreads();

     // x runs faster
     auto idy = blockIdx.y * blockDim.y + threadIdx.y;
     auto first = threadIdx.x;
     auto stride = blockDim.x;

     uint32_t pairLayerId = 0;  // cannot go backward
     for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
       while (j >= innerLayerCumulativeSize[pairLayerId++])
         ;
       --pairLayerId;  // move to lower_bound ??

       assert(pairLayerId < nPairs);
       assert(j < innerLayerCumulativeSize[pairLayerId]);
       assert(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]);

       uint8_t inner = layerPairs[2 * pairLayerId];
       uint8_t outer = layerPairs[2 * pairLayerId + 1];
       assert(outer > inner);

       auto hoff = PhiBinner::histOff(outer);

       auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
       i += offsets[inner];

       // printf("Hit in Layer %d %d %d %d\n", i, inner, pairLayerId, j);

       assert(i >= offsets[inner]);
       assert(i < offsets[inner + 1]);

       // found hit corresponding to our cuda thread, now do the job
       auto mi = hh.detectorIndex(i);
       if (mi > gpuClustering::maxNumModules)
         continue;  // invalid

       /* maybe clever, not effective when zoCut is on
       auto bpos = (mi%8)/4;  // if barrel is 1 for z>0
       auto fpos = (outer>3) & (outer<7);
       if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
       */

       auto mez = hh.zGlobal(i);

       if (mez < minz[pairLayerId] || mez > maxz[pairLayerId])
         continue;

       int16_t mes = -1;  // make compiler happy
       if (doClusterCut) {
         // if ideal treat inner ladder as outer
         if (inner == 0)
           assert(mi < 96);
         isOuterLadder = ideal_cond ? true : 0 == (mi / 8) % 2;  // only for B1/B2/B3 B4 is opposite, FPIX:noclue...

         // in any case we always test mes>0 ...
         mes = inner > 0 || isOuterLadder ? hh.clusterSizeY(i) : -1;

         if (inner == 0 && outer > 3)  // B1 and F1
           if (mes > 0 && mes < minYsizeB1)
             continue;                 // only long cluster  (5*8)
         if (inner == 1 && outer > 3)  // B2 and F1
           if (mes > 0 && mes < minYsizeB2)
             continue;
       }
       auto mep = hh.iphi(i);
       auto mer = hh.rGlobal(i);

       // all cuts: true if fails
       constexpr float z0cut = 12.f;      // cm
       constexpr float hardPtCut = 0.5f;  // GeV
       // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
       constexpr float minRadius = hardPtCut * 87.78f;
       constexpr float minRadius2T4 = 4.f * minRadius * minRadius;
       auto ptcut = [&](int j, int16_t idphi) {
         auto r2t4 = minRadius2T4;
         auto ri = mer;
         auto ro = hh.rGlobal(j);
         auto dphi = short2phi(idphi);
         return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
       };
       auto z0cutoff = [&](int j) {
         auto zo = hh.zGlobal(j);
         auto ro = hh.rGlobal(j);
         auto dr = ro - mer;
         return dr > maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
       };

       auto zsizeCut = [&](int j) {
         auto onlyBarrel = outer < 4;
         auto so = hh.clusterSizeY(j);
         auto dy = inner == 0 ? maxDYsize12 : maxDYsize;
         // in the barrel cut on difference in size
         // in the endcap on the prediction on the first layer (actually in the barrel only: happen to be safe for endcap as well)
         // FIXME move pred cut to z0cutoff to optmize loading of and computaiton ...
         auto zo = hh.zGlobal(j);
         auto ro = hh.rGlobal(j);
         return onlyBarrel ? mes > 0 && so > 0 && std::abs(so - mes) > dy
                           : (inner < 4) && mes > 0 &&
                                 std::abs(mes - int(std::abs((mez - zo) / (mer - ro)) * dzdrFact + 0.5f)) > maxDYPred;
       };

       auto iphicut = phicuts[pairLayerId];

       auto kl = PhiBinner::bin(int16_t(mep - iphicut));
       auto kh = PhiBinner::bin(int16_t(mep + iphicut));
       auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };

 #ifdef GPU_DEBUG
       int tot = 0;
       int nmin = 0;
       int tooMany = 0;
 #endif

       auto khh = kh;
       incr(khh);
       for (auto kk = kl; kk != khh; incr(kk)) {
 #ifdef GPU_DEBUG
         if (kk != kl && kk != kh)
           nmin += phiBinner.size(kk + hoff);
 #endif
         auto const* __restrict__ p = phiBinner.begin(kk + hoff);
         auto const* __restrict__ e = phiBinner.end(kk + hoff);
         p += first;
         for (; p < e; p += stride) {
           auto oi = __ldg(p);
           assert(oi >= offsets[outer]);
           assert(oi < offsets[outer + 1]);
           auto mo = hh.detectorIndex(oi);
           if (mo > gpuClustering::maxNumModules)
             continue;  //    invalid

           if (doZ0Cut && z0cutoff(oi))
             continue;

           auto mop = hh.iphi(oi);
           uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
           if (idphi > iphicut)
             continue;

           if (doClusterCut && zsizeCut(oi))
             continue;
           if (doPtCut && ptcut(oi, idphi))
             continue;

           auto ind = atomicAdd(nCells, 1);
           if (ind >= maxNumOfDoublets) {
             atomicSub(nCells, 1);
             break;
           }  // move to SimpleVector??
           // int layerPairId, int doubletId, int innerHitId, int outerHitId)
           cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
           isOuterHitOfCell[oi].push_back(ind);
 #ifdef GPU_DEBUG
           if (isOuterHitOfCell[oi].full())
             ++tooMany;
           ++tot;
 #endif
         }
       }
 #ifdef GPU_DEBUG
       if (tooMany > 0)
         printf("OuterHitOfCell full for %d in layer %d/%d, %d,%d %d\n", i, inner, outer, nmin, tot, tooMany);
 #endif
     }  // loop in block...
   }

 }  // namespace gpuPixelDoublets

 #endif  // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29

gpuPixelDoublets::cellTracks
uint32_t CellNeighborsVector CellTracksVector * cellTracks
Definition: gpuPixelDoublets.h:99

caConstants::CellTracks
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:72

gpuPixelDoublets::assert
assert(offsets)

SurfaceOrientation::outer
Definition: Surface.h:19

approx_atan2.h

cms::cuda::HistoContainer::histOff
static constexpr auto histOff(uint32_t nh)
Definition: HistoContainer.h:130

gpuPixelDoublets::layerPairs
__constant__ const uint8_t layerPairs[2 *nPairs]
Definition: gpuPixelDoublets.h:18

caConstants::CellNeighborsVector
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:74

__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22

mps_fire.i
i
Definition: mps_fire.py:428

gpuPixelDoublets::maxz
__constant__ float const maxz[nPairs]
Definition: gpuPixelDoublets.h:55

gpuPixelDoublets::minz
__constant__ float const minz[nPairs]
Definition: gpuPixelDoublets.h:53

TrackingRecHit2DHeterogeneous.h

cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33

gpuPixelDoublets::cells
__device__ uint32_t GPUCACell * cells
Definition: gpuPixelDoubletsAlgos.h:26

gpuPixelDoublets::doZ0Cut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool doZ0Cut
Definition: gpuPixelDoublets.h:99

dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66

gpuPixelDoublets::CellNeighborsVector
caConstants::CellNeighborsVector CellNeighborsVector
Definition: gpuPixelDoublets.h:65

cms::cudacompat::atomicSub
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73

caConstants::maxNumberOfLayerPairs
constexpr uint32_t maxNumberOfLayerPairs
Definition: CAConstants.h:44

cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30

gpuPixelDoublets::offsets
uint32_t const  *__restrict__ offsets
Definition: gpuPixelDoubletsAlgos.h:56

gpuPixelDoublets::maxr
__constant__ float const maxr[nPairs]
Definition: gpuPixelDoublets.h:57

gpuPixelDoublets::doPtCut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool bool doPtCut
Definition: gpuPixelDoublets.h:99

MillePedeFileConverter_cfg.e
e
Definition: MillePedeFileConverter_cfg.py:37

gpuPixelDoublets::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: gpuPixelDoublets.h:63

gpuPixelDoublets::nPairs
constexpr int nPairs
Definition: gpuPixelDoublets.h:12

gpuPixelDoublets
Definition: gpuFishbone.h:17

cms::cuda::HistoContainer
Definition: HistoContainer.h:101

gpuPixelDoublets::CellTracks
caConstants::CellTracks CellTracks
Definition: gpuPixelDoublets.h:64

gpuPixelDoublets::CellTracksVector
caConstants::CellTracksVector CellTracksVector
Definition: gpuPixelDoublets.h:66

TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16

gpuPixelDoublets::isOuterHitOfCell
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell isOuterHitOfCell
Definition: gpuPixelDoublets.h:99

gpuPixelDoublets::isOuterLadder
bool isOuterLadder
Definition: gpuPixelDoubletsAlgos.h:51

cms::cuda::SimpleVector::push_back
__device__ int push_back(const T &element)
Definition: SimpleVector.h:60

funct::true
true
Definition: Factorize.h:173

caConstants::OuterHitOfCell
Definition: CAConstants.h:82

gpuPixelDoublets::layerSize
auto layerSize
Definition: gpuPixelDoubletsAlgos.h:59

caConstants::CellTracksVector
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:75

gpuPixelDoublets::phiBinner
auto const  &__restrict__ phiBinner
Definition: gpuPixelDoubletsAlgos.h:55

SurfaceOrientation::inner
Definition: Surface.h:19

TrackingRecHit2DSOAView::PhiBinner
cms::cuda::HistoContainer< int16_t, 256, -1, 8 *sizeof(int16_t), hindex_type, pixelTopology::maxLayers > PhiBinner
Definition: TrackingRecHit2DSOAView.h:24

full
Definition: GenABIO.cc:168

gpuPixelDoublets::innerLayerCumulativeSize
__shared__ uint32_t innerLayerCumulativeSize[nPairsMax]
Definition: gpuPixelDoubletsAlgos.h:66

gpuPixelDoublets::doClusterCut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell int bool bool doClusterCut
Definition: gpuPixelDoublets.h:99

SiStripPI::min
Definition: SiStripPayloadInspectorHelper.h:178

funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22

f
double f[11][100]
Definition: MuScleFitUtils.cc:78

gpuClustering::maxNumModules
constexpr uint16_t maxNumModules
Definition: gpuClusteringConstants.h:19

gpuPixelDoublets::maxDYsize12
constexpr int maxDYsize12
Definition: gpuPixelDoubletsAlgos.h:46

nmin
Quality *__restrict__ uint16_t nmin
Definition: CAHitNtupletGeneratorKernelsImpl.h:646

gpuPixelDoublets::ideal_cond
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell int bool ideal_cond
Definition: gpuPixelDoublets.h:99

cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32

gpuPixelDoublets::nCells
uint32_t * nCells
Definition: gpuPixelDoublets.h:99

gpuPixelDoublets::phicuts
__constant__ const int16_t phicuts[nPairs]
Definition: gpuPixelDoublets.h:32

gpuPixelDoublets::maxDYPred
constexpr int maxDYPred
Definition: gpuPixelDoubletsAlgos.h:48

gpuPixelDoublets::dzdrFact
constexpr float dzdrFact
Definition: gpuPixelDoubletsAlgos.h:49

cms::cuda::HistoContainer::nbins
static constexpr uint32_t nbins()
Definition: HistoContainer.h:123

cms::cudacompat::__ldg
T __ldg(T const *x)
Definition: cudaCompat.h:137

cms::cuda::HistoContainer::bin
static constexpr UT bin(T t)
Definition: HistoContainer.h:132

short2phi
constexpr float short2phi(short x)
Definition: approx_atan2.h:285

gpuPixelDoublets::pairLayerId
uint32_t pairLayerId
Definition: gpuPixelDoubletsAlgos.h:82

gpuPixelDoublets::nPairsMax
const int nPairsMax
Definition: gpuPixelDoubletsAlgos.h:64

compareTotals.tot
tot
Definition: compareTotals.py:299

gpuPixelDoublets::cellNeighbors
uint32_t CellNeighborsVector * cellNeighbors
Definition: gpuPixelDoublets.h:99

GetRecoTauVFromDQM_MC_cff.kk
kk
Definition: GetRecoTauVFromDQM_MC_cff.py:84

gpuPixelDoublets::maxNumOfDoublets
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool bool uint32_t maxNumOfDoublets
Definition: gpuPixelDoublets.h:109

VecArray.h

gpuPixelDoublets::minYsizeB2
constexpr int minYsizeB2
Definition: gpuPixelDoubletsAlgos.h:45

PVValHelper::dy
Definition: PVValidationHelpers.h:50

CAConstants.h

gpuPixelDoublets::stride
auto stride
Definition: gpuPixelDoubletsAlgos.h:80

flavorHistoryFilter_cfi.dr
dr
Definition: flavorHistoryFilter_cfi.py:37

GPUCACell
Definition: GPUCACell.h:20

cms::cuda::SimpleVector
Definition: SimpleVector.h:15

gpuPixelDoublets::maxDYsize
constexpr int maxDYsize
Definition: gpuPixelDoubletsAlgos.h:47

__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15

GPUCACell.h

submitPVValidationJobs.ptcut
ptcut
Definition: submitPVValidationJobs.py:398

gpuPixelDoublets::ntot
__shared__ uint32_t ntot
Definition: gpuPixelDoubletsAlgos.h:67

gpuPixelDoublets::hh
__device__ uint32_t GPUCACell uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const  &__restrict__ hh
Definition: gpuPixelDoubletsAlgos.h:26

cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61

cuda_assert.h

AlCaHLTBitMon_ParallelJobs.p
def p
Definition: AlCaHLTBitMon_ParallelJobs.py:153

dqmdumpme.k
k
Definition: dqmdumpme.py:60

gpuPixelDoublets::idy
auto idy
Definition: gpuPixelDoubletsAlgos.h:78

gpuPixelDoublets::__syncthreads
__syncthreads()
Definition: cudaCompat.h:132

gpuPixelDoublets::doubletsFromHisto
doubletsFromHisto(layerPairs, nActualPairs, cells, nCells, cellNeighbors, cellTracks, hh, isOuterHitOfCell, phicuts, minz, maxz, maxr, ideal_cond, doClusterCut, doZ0Cut, doPtCut, maxNumOfDoublets)

caConstants::CellNeighbors
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:71

gpuPixelDoublets::first
auto first
Definition: gpuPixelDoubletsAlgos.h:79