CMS 3D CMS Logo

gpuPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
14 
15 #include "CAConstants.h"
16 #include "GPUCACell.h"
17 
18 namespace gpuPixelDoublets {
19 
24 
25  __device__ __forceinline__ void doubletsFromHisto(uint8_t const* __restrict__ layerPairs,
26  uint32_t nPairs,
28  uint32_t* nCells,
31  TrackingRecHit2DSOAView const& __restrict__ hh,
33  int16_t const* __restrict__ phicuts,
34  float const* __restrict__ minz,
35  float const* __restrict__ maxz,
36  float const* __restrict__ maxr,
37  bool ideal_cond,
38  bool doClusterCut,
39  bool doZ0Cut,
40  bool doPtCut,
41  uint32_t maxNumOfDoublets) {
42  // ysize cuts (z in the barrel) times 8
43  // these are used if doClusterCut is true
44  constexpr int minYsizeB1 = 36;
45  constexpr int minYsizeB2 = 28;
46  constexpr int maxDYsize12 = 28;
47  constexpr int maxDYsize = 20;
48  constexpr int maxDYPred = 20;
49  constexpr float dzdrFact = 8 * 0.0285 / 0.015; // from dz/dr to "DY"
50 
52 
54 
55  auto const& __restrict__ phiBinner = hh.phiBinner();
56  uint32_t const* __restrict__ offsets = hh.hitsLayerStart();
57  assert(offsets);
58 
59  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
60 
61  // nPairsMax to be optimized later (originally was 64).
62  // If it should be much bigger, consider using a block-wide parallel prefix scan,
63  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
66  __shared__ uint32_t innerLayerCumulativeSize[nPairsMax];
67  __shared__ uint32_t ntot;
68  if (threadIdx.y == 0 && threadIdx.x == 0) {
70  for (uint32_t i = 1; i < nPairs; ++i) {
72  }
74  }
75  __syncthreads();
76 
77  // x runs faster
78  auto idy = blockIdx.y * blockDim.y + threadIdx.y;
79  auto first = threadIdx.x;
80  auto stride = blockDim.x;
81 
82  uint32_t pairLayerId = 0; // cannot go backward
83  for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
85  ;
86  --pairLayerId; // move to lower_bound ??
87 
91 
92  uint8_t inner = layerPairs[2 * pairLayerId];
93  uint8_t outer = layerPairs[2 * pairLayerId + 1];
94  assert(outer > inner);
95 
96  auto hoff = PhiBinner::histOff(outer);
97 
98  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
99  i += offsets[inner];
100 
101  // printf("Hit in Layer %d %d %d %d\n", i, inner, pairLayerId, j);
102 
103  assert(i >= offsets[inner]);
104  assert(i < offsets[inner + 1]);
105 
106  // found hit corresponding to our cuda thread, now do the job
107  auto mi = hh.detectorIndex(i);
109  continue; // invalid
110 
111  /* maybe clever, not effective when zoCut is on
112  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
113  auto fpos = (outer>3) & (outer<7);
114  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
115  */
116 
117  auto mez = hh.zGlobal(i);
118 
119  if (mez < minz[pairLayerId] || mez > maxz[pairLayerId])
120  continue;
121 
122  int16_t mes = -1; // make compiler happy
123  if (doClusterCut) {
124  // if ideal treat inner ladder as outer
125  if (inner == 0)
126  assert(mi < 96);
127  isOuterLadder = ideal_cond ? true : 0 == (mi / 8) % 2; // only for B1/B2/B3 B4 is opposite, FPIX:noclue...
128 
129  // in any case we always test mes>0 ...
130  mes = inner > 0 || isOuterLadder ? hh.clusterSizeY(i) : -1;
131 
132  if (inner == 0 && outer > 3) // B1 and F1
133  if (mes > 0 && mes < minYsizeB1)
134  continue; // only long cluster (5*8)
135  if (inner == 1 && outer > 3) // B2 and F1
136  if (mes > 0 && mes < minYsizeB2)
137  continue;
138  }
139  auto mep = hh.iphi(i);
140  auto mer = hh.rGlobal(i);
141 
142  // all cuts: true if fails
143  constexpr float z0cut = 12.f; // cm
144  constexpr float hardPtCut = 0.5f; // GeV
145  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
146  constexpr float minRadius = hardPtCut * 87.78f;
147  constexpr float minRadius2T4 = 4.f * minRadius * minRadius;
148  auto ptcut = [&](int j, int16_t idphi) {
149  auto r2t4 = minRadius2T4;
150  auto ri = mer;
151  auto ro = hh.rGlobal(j);
152  auto dphi = short2phi(idphi);
153  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
154  };
155  auto z0cutoff = [&](int j) {
156  auto zo = hh.zGlobal(j);
157  auto ro = hh.rGlobal(j);
158  auto dr = ro - mer;
159  return dr > maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
160  };
161 
162  auto zsizeCut = [&](int j) {
163  auto onlyBarrel = outer < 4;
164  auto so = hh.clusterSizeY(j);
165  auto dy = inner == 0 ? maxDYsize12 : maxDYsize;
166  // in the barrel cut on difference in size
167  // in the endcap on the prediction on the first layer (actually in the barrel only: happen to be safe for endcap as well)
168  // FIXME move pred cut to z0cutoff to optmize loading of and computaiton ...
169  auto zo = hh.zGlobal(j);
170  auto ro = hh.rGlobal(j);
171  return onlyBarrel ? mes > 0 && so > 0 && std::abs(so - mes) > dy
172  : (inner < 4) && mes > 0 &&
173  std::abs(mes - int(std::abs((mez - zo) / (mer - ro)) * dzdrFact + 0.5f)) > maxDYPred;
174  };
175 
176  auto iphicut = phicuts[pairLayerId];
177 
178  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
179  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
180  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
181 
182 #ifdef GPU_DEBUG
183  int tot = 0;
184  int nmin = 0;
185  int tooMany = 0;
186 #endif
187 
188  auto khh = kh;
189  incr(khh);
190  for (auto kk = kl; kk != khh; incr(kk)) {
191 #ifdef GPU_DEBUG
192  if (kk != kl && kk != kh)
193  nmin += phiBinner.size(kk + hoff);
194 #endif
195  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
196  auto const* __restrict__ e = phiBinner.end(kk + hoff);
197  p += first;
198  for (; p < e; p += stride) {
199  auto oi = __ldg(p);
200  assert(oi >= offsets[outer]);
201  assert(oi < offsets[outer + 1]);
202  auto mo = hh.detectorIndex(oi);
204  continue; // invalid
205 
206  if (doZ0Cut && z0cutoff(oi))
207  continue;
208 
209  auto mop = hh.iphi(oi);
210  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
211  if (idphi > iphicut)
212  continue;
213 
214  if (doClusterCut && zsizeCut(oi))
215  continue;
216  if (doPtCut && ptcut(oi, idphi))
217  continue;
218 
219  auto ind = atomicAdd(nCells, 1);
220  if (ind >= maxNumOfDoublets) {
221  atomicSub(nCells, 1);
222  break;
223  } // move to SimpleVector??
224  // int layerPairId, int doubletId, int innerHitId, int outerHitId)
225  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, ind, i, oi);
226  isOuterHitOfCell[oi].push_back(ind);
227 #ifdef GPU_DEBUG
228  if (isOuterHitOfCell[oi].full())
229  ++tooMany;
230  ++tot;
231 #endif
232  }
233  }
234 #ifdef GPU_DEBUG
235  if (tooMany > 0)
236  printf("OuterHitOfCell full for %d in layer %d/%d, %d,%d %d\n", i, inner, outer, nmin, tot, tooMany);
237 #endif
238  } // loop in block...
239  }
240 
241 } // namespace gpuPixelDoublets
242 
243 #endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
cms::cudacompat::atomicSub
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
mps_fire.i
i
Definition: mps_fire.py:428
gpuPixelDoublets::doPtCut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool bool doPtCut
Definition: gpuPixelDoublets.h:99
gpuPixelDoublets::cellNeighbors
uint32_t CellNeighborsVector * cellNeighbors
Definition: gpuPixelDoublets.h:99
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
gpuPixelDoublets::ideal_cond
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool ideal_cond
Definition: gpuPixelDoublets.h:99
min
T min(T a, T b)
Definition: MathUtil.h:58
gpuPixelDoublets::idy
auto idy
Definition: gpuPixelDoubletsAlgos.h:78
caConstants::CellTracks
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:70
gpuPixelDoublets::minYsizeB2
constexpr int minYsizeB2
Definition: gpuPixelDoubletsAlgos.h:45
caConstants::CellNeighbors
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:69
TrackingRecHit2DHeterogeneous.h
gpuPixelDoublets::ntot
__shared__ uint32_t ntot
Definition: gpuPixelDoubletsAlgos.h:67
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16
full
Definition: GenABIO.cc:168
gpuPixelDoublets::maxDYsize
constexpr int maxDYsize
Definition: gpuPixelDoubletsAlgos.h:47
gpuPixelDoublets::cells
__device__ uint32_t GPUCACell * cells
Definition: gpuPixelDoubletsAlgos.h:26
cms::cuda::SimpleVector
Definition: SimpleVector.h:15
gpuPixelDoublets::maxz
__constant__ const float maxz[nPairs]
Definition: gpuPixelDoublets.h:55
gpuPixelDoublets::maxNumOfDoublets
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool bool uint32_t maxNumOfDoublets
Definition: gpuPixelDoublets.h:109
gpuPixelDoublets::hh
__device__ uint32_t GPUCACell uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const &__restrict__ hh
Definition: gpuPixelDoubletsAlgos.h:26
gpuPixelDoublets::pairLayerId
uint32_t pairLayerId
Definition: gpuPixelDoubletsAlgos.h:82
SurfaceOrientation::inner
Definition: Surface.h:19
gpuPixelDoublets::layerSize
auto layerSize
Definition: gpuPixelDoubletsAlgos.h:59
gpuPixelDoublets::nCells
uint32_t * nCells
Definition: gpuPixelDoublets.h:99
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
gpuPixelDoublets::doZ0Cut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool bool doZ0Cut
Definition: gpuPixelDoublets.h:99
GetRecoTauVFromDQM_MC_cff.kk
kk
Definition: GetRecoTauVFromDQM_MC_cff.py:84
dqmdumpme.k
k
Definition: dqmdumpme.py:60
VecArray.h
gpuClustering::maxNumModules
constexpr uint16_t maxNumModules
Definition: gpuClusteringConstants.h:18
gpuPixelDoublets::CellNeighborsVector
caConstants::CellNeighborsVector CellNeighborsVector
Definition: gpuPixelDoublets.h:65
submitPVValidationJobs.ptcut
list ptcut
Definition: submitPVValidationJobs.py:682
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
funct::true
true
Definition: Factorize.h:173
gpuPixelDoublets::nPairsMax
const int nPairsMax
Definition: gpuPixelDoubletsAlgos.h:64
gpuPixelDoublets::first
auto first
Definition: gpuPixelDoubletsAlgos.h:79
gpuPixelDoublets::cellTracks
uint32_t CellNeighborsVector CellTracksVector * cellTracks
Definition: gpuPixelDoublets.h:99
AlCaHLTBitMon_ParallelJobs.p
def p
Definition: AlCaHLTBitMon_ParallelJobs.py:153
approx_atan2.h
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
gpuPixelDoublets
Definition: gpuFishbone.h:17
gpuPixelDoublets::layerPairs
const __constant__ uint8_t layerPairs[2 *nPairs]
Definition: gpuPixelDoublets.h:18
caConstants::CellNeighborsVector
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:72
gpuPixelDoublets::doubletsFromHisto
doubletsFromHisto(layerPairs, nActualPairs, cells, nCells, cellNeighbors, cellTracks, hh, isOuterHitOfCell, phicuts, minz, maxz, maxr, ideal_cond, doClusterCut, doZ0Cut, doPtCut, maxNumOfDoublets)
gpuPixelDoublets::__syncthreads
__syncthreads()
Definition: cudaCompat.h:108
cms::cuda::VecArray
Definition: VecArray.h:14
PVValHelper::dy
Definition: PVValidationHelpers.h:50
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
gpuPixelDoublets::maxr
__constant__ const float maxr[nPairs]
Definition: gpuPixelDoublets.h:57
gpuPixelDoublets::phiBinner
auto const &__restrict__ phiBinner
Definition: gpuPixelDoubletsAlgos.h:55
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
gpuPixelDoublets::isOuterLadder
bool isOuterLadder
Definition: gpuPixelDoubletsAlgos.h:51
nmin
const HitContainer *__restrict__ const TkSoA *__restrict__ Quality *__restrict__ uint16_t nmin
Definition: CAHitNtupletGeneratorKernelsImpl.h:634
TrackingRecHit2DSOAView::PhiBinner
cms::cuda::HistoContainer< int16_t, 128, -1, 8 *sizeof(int16_t), hindex_type, 10 > PhiBinner
Definition: TrackingRecHit2DSOAView.h:23
gpuPixelDoublets::phicuts
const __constant__ int16_t phicuts[nPairs]
Definition: gpuPixelDoublets.h:32
gpuPixelDoublets::offsets
uint32_t const *__restrict__ offsets
Definition: gpuPixelDoubletsAlgos.h:56
gpuPixelDoublets::nPairs
constexpr int nPairs
Definition: gpuPixelDoublets.h:12
cms::cuda::HistoContainer::nbins
static constexpr uint32_t nbins()
Definition: HistoContainer.h:123
gpuPixelDoublets::assert
assert(offsets)
gpuPixelDoublets::maxDYPred
constexpr int maxDYPred
Definition: gpuPixelDoubletsAlgos.h:48
gpuPixelDoublets::CellTracks
caConstants::CellTracks CellTracks
Definition: gpuPixelDoublets.h:64
cms::cuda::SimpleVector::push_back
__device__ int push_back(const T &element)
Definition: SimpleVector.h:60
flavorHistoryFilter_cfi.dr
dr
Definition: flavorHistoryFilter_cfi.py:37
GPUCACell.h
cms::cuda::HistoContainer::bin
static constexpr UT bin(T t)
Definition: HistoContainer.h:132
gpuPixelDoublets::maxDYsize12
constexpr int maxDYsize12
Definition: gpuPixelDoubletsAlgos.h:46
gpuPixelDoublets::isOuterHitOfCell
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell * isOuterHitOfCell
Definition: gpuPixelDoublets.h:99
gpuPixelDoublets::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: gpuPixelDoublets.h:63
GPUCACell
Definition: GPUCACell.h:20
caConstants::maxNumberOfLayerPairs
constexpr uint32_t maxNumberOfLayerPairs
Definition: CAConstants.h:43
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
cms::cuda::HistoContainer
Definition: HistoContainer.h:101
gpuPixelDoublets::dzdrFact
constexpr float dzdrFact
Definition: gpuPixelDoubletsAlgos.h:49
gpuPixelDoublets::doClusterCut
uint32_t CellNeighborsVector CellTracksVector TrackingRecHit2DSOAView const *__restrict__ GPUCACell::OuterHitOfCell int bool bool doClusterCut
Definition: gpuPixelDoublets.h:99
cuda_assert.h
SurfaceOrientation::outer
Definition: Surface.h:19
dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66
cms::cudacompat::__ldg
T __ldg(T const *x)
Definition: cudaCompat.h:113
CAConstants.h
short2phi
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
gpuPixelDoublets::stride
auto stride
Definition: gpuPixelDoubletsAlgos.h:80
gpuPixelDoublets::CellTracksVector
caConstants::CellTracksVector CellTracksVector
Definition: gpuPixelDoublets.h:66
gpuPixelDoublets::minz
__constant__ const float minz[nPairs]
Definition: gpuPixelDoublets.h:53
cms::cuda::HistoContainer::histOff
static constexpr auto histOff(uint32_t nh)
Definition: HistoContainer.h:130
gpuPixelDoublets::innerLayerCumulativeSize
__shared__ uint32_t innerLayerCumulativeSize[nPairsMax]
Definition: gpuPixelDoubletsAlgos.h:66
MillePedeFileConverter_cfg.e
e
Definition: MillePedeFileConverter_cfg.py:37
caConstants::CellTracksVector
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:73
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32