CMS 3D CMS Logo

gpuPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
14 
16 #include "CAStructures.h"
17 #include "GPUCACell.h"
18 
19 //#define GPU_DEBUG
20 //#define NTUPLE_DEBUG
21 
22 namespace gpuPixelDoublets {
23 
24  template <typename TrackerTraits>
26  template <typename TrackerTraits>
28  template <typename TrackerTraits>
30  template <typename TrackerTraits>
32  template <typename TrackerTraits>
34  template <typename TrackerTraits>
36 
37  template <typename TrackerTraits>
38  struct CellCutsT {
40  using T = TrackerTraits;
41 
42  const uint32_t maxNumberOfDoublets_;
43  const bool doClusterCut_;
44  const bool doZ0Cut_;
45  const bool doPtCut_;
46  const bool idealConditions_; //this is actually not used by phase2
47 
48  __device__ __forceinline__ bool zSizeCut(H hh, int i, int o) const {
49  const uint32_t mi = hh[i].detectorIndex();
50 
51  bool innerB1 = mi < T::last_bpix1_detIndex;
52  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
53  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
54 
55  if (mes < 0)
56  return false;
57 
58  const uint32_t mo = hh[o].detectorIndex();
59  auto so = hh[o].clusterSizeY();
60 
61  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
62  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
63 
64  auto innerBarrel = mi < T::last_barrel_detIndex;
65  auto onlyBarrel = mo < T::last_barrel_detIndex;
66 
67  if (not innerBarrel and not onlyBarrel)
68  return false;
69  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
70 
71  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
72  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
73  }
74 
75  __device__ __forceinline__ bool clusterCut(H hh, int i) const {
76  const uint32_t mi = hh[i].detectorIndex();
77  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
78 
79  if (!innerB1orB2)
80  return false;
81 
82  bool innerB1 = mi < T::last_bpix1_detIndex;
83  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
84  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
85 
86  if (innerB1) // B1
87  if (mes > 0 && mes < T::minYsizeB1)
88  return true; // only long cluster (5*8)
89  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
90  if (innerB2) // B2 and F1
91  if (mes > 0 && mes < T::minYsizeB2)
92  return true;
93 
94  return false;
95  }
96  };
97 
98  template <typename TrackerTraits>
99  __device__ __forceinline__ void doubletsFromHisto(uint32_t nPairs,
101  uint32_t* nCells,
102  CellNeighborsVector<TrackerTraits>* cellNeighbors,
103  CellTracksVector<TrackerTraits>* cellTracks,
104  HitsConstView<TrackerTraits> hh,
105  OuterHitOfCell<TrackerTraits> isOuterHitOfCell,
106  CellCutsT<TrackerTraits> const& cuts) {
107  // ysize cuts (z in the barrel) times 8
108  // these are used if doClusterCut is true
109 
110  const bool doClusterCut = cuts.doClusterCut_;
111  const bool doZ0Cut = cuts.doZ0Cut_;
112  const bool doPtCut = cuts.doPtCut_;
113  const uint32_t maxNumOfDoublets = cuts.maxNumberOfDoublets_;
114 
116 
117  auto const& __restrict__ phiBinner = hh.phiBinner();
118  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
119  assert(offsets);
120 
121  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
122 
123  // nPairsMax to be optimized later (originally was 64).
124  // If it should be much bigger, consider using a block-wide parallel prefix scan,
125  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
126 
128  __shared__ uint32_t ntot;
129  if (threadIdx.y == 0 && threadIdx.x == 0) {
131  for (uint32_t i = 1; i < nPairs; ++i) {
133  }
135  }
136  __syncthreads();
137 
138  // x runs faster
139  auto idy = blockIdx.y * blockDim.y + threadIdx.y;
140  auto first = threadIdx.x;
141  auto stride = blockDim.x;
142 
143  uint32_t pairLayerId = 0; // cannot go backward
144 
145  for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
147  ;
148  --pairLayerId; // move to lower_bound ??
149 
153 
155  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
156  assert(outer > inner);
157 
158  auto hoff = PhiBinner::histOff(outer);
159  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
160  i += offsets[inner];
161 
162  assert(i >= offsets[inner]);
163  assert(i < offsets[inner + 1]);
164 
165  // found hit corresponding to our cuda thread, now do the job
166 
167  if (hh[i].detectorIndex() > gpuClustering::maxNumModules)
168  continue; // invalid
169 
170  /* maybe clever, not effective when zoCut is on
171  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
172  auto fpos = (outer>3) & (outer<7);
173  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
174  */
175 
176  auto mez = hh[i].zGlobal();
177 
179  continue;
180 
181  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(hh, i))
182  continue;
183 
184  auto mep = hh[i].iphi();
185  auto mer = hh[i].rGlobal();
186 
187  // all cuts: true if fails
188  constexpr float z0cut = TrackerTraits::z0Cut; // cm
189  constexpr float hardPtCut = TrackerTraits::doubletHardPt; // GeV
190  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
191  constexpr float minRadius = hardPtCut * 87.78f;
192  constexpr float minRadius2T4 = 4.f * minRadius * minRadius;
193  auto ptcut = [&](int j, int16_t idphi) {
194  auto r2t4 = minRadius2T4;
195  auto ri = mer;
196  auto ro = hh[j].rGlobal();
197  auto dphi = short2phi(idphi);
198  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
199  };
200  auto z0cutoff = [&](int j) {
201  auto zo = hh[j].zGlobal();
202  auto ro = hh[j].rGlobal();
203  auto dr = ro - mer;
204  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
205  };
206 
207  auto iphicut = TrackerTraits::phicuts[pairLayerId];
208 
209  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
210  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
211  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
212 
213 #ifdef GPU_DEBUG
214  int tot = 0;
215  int nmin = 0;
216  int tooMany = 0;
217 #endif
218 
219  auto khh = kh;
220  incr(khh);
221  for (auto kk = kl; kk != khh; incr(kk)) {
222 #ifdef GPU_DEBUG
223  if (kk != kl && kk != kh)
224  nmin += phiBinner.size(kk + hoff);
225 #endif
226  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
227  auto const* __restrict__ e = phiBinner.end(kk + hoff);
228  p += first;
229  for (; p < e; p += stride) {
230  auto oi = __ldg(p);
231  assert(oi >= offsets[outer]);
232  assert(oi < offsets[outer + 1]);
233  auto mo = hh[oi].detectorIndex();
234 
236  continue; // invalid
237 
238  if (doZ0Cut && z0cutoff(oi))
239  continue;
240  auto mop = hh[oi].iphi();
241  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
242  if (idphi > iphicut)
243  continue;
244 
245  if (doClusterCut && cuts.zSizeCut(hh, i, oi))
246  continue;
247  if (doPtCut && ptcut(oi, idphi))
248  continue;
249 
250  auto ind = atomicAdd(nCells, 1);
251  if (ind >= maxNumOfDoublets) {
252  atomicSub(nCells, 1);
253  break;
254  } // move to SimpleVector??
255  // int layerPairId, int doubletId, int innerHitId, int outerHitId)
256  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
257  isOuterHitOfCell[oi].push_back(ind);
258 #ifdef GPU_DEBUG
259  if (isOuterHitOfCell[oi].full())
260  ++tooMany;
261  ++tot;
262 #endif
263  }
264  }
265 // #endif
266 #ifdef GPU_DEBUG
267  if (tooMany > 0)
268  printf("OuterHitOfCell full for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f\n",
269  i,
270  inner,
271  outer,
272  nmin,
273  tot,
274  tooMany,
275  iphicut,
278 #endif
279  } // loop in block...
280  }
281 
282 } // namespace gpuPixelDoublets
283 
284 #endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h
const dim3 threadIdx
Definition: cudaCompat.h:29
#define __forceinline__
Definition: cudaCompat.h:22
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
int CellNeighborsVector< TrackerTraits > * cellNeighbors
const dim3 gridDim
Definition: cudaCompat.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: gpuFishbone.h:24
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
constexpr int16_t phicuts[nPairs]
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: gpuFishbone.h:20
const dim3 blockDim
Definition: cudaCompat.h:30
uint32_t const *__restrict__ offsets
const uint32_t maxNumOfDoublets
__shared__ uint32_t innerLayerCumulativeSize[TrackerTraits::nPairs]
constexpr uint16_t last_barrel_layer
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
auto const &__restrict__ phiBinner
Definition: GenABIO.cc:168
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
GPUCACellT< TrackerTraits > * cells
Definition: gpuFishbone.h:34
constexpr uint16_t maxNumModules
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: gpuFishbone.h:22
int CellNeighborsVector< TrackerTraits > CellNeighbors< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
const dim3 blockIdx
Definition: cudaCompat.h:32
GPUCACellT< TrackerTraits > uint32_t const *__restrict__ nCells
Definition: gpuFishbone.h:34
auto const isOuterHitOfCell
Definition: gpuFishbone.h:41
T __ldg(T const *x)
Definition: cudaCompat.h:137
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
HitsConstView< TrackerTraits > H
if(innerB1) if(mes > 0 &&mes< T bool innerB2
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int CellCutsT< TrackerTraits > cuts
constexpr float maxz[nPairs]
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: gpuFishbone.h:26
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: gpuFishbone.h:28
#define __device__
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: gpuFishbone.h:30
__shared__ uint32_t ntot
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr float maxr[nPairs]