CMS 3D CMS Logo

gpuPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
2 #define RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
15 
16 #include "CAStructures.h"
17 #include "GPUCACell.h"
18 
19 //#define GPU_DEBUG
20 //#define NTUPLE_DEBUG
21 
22 namespace gpuPixelDoublets {
23 
24  template <typename TrackerTraits>
26  template <typename TrackerTraits>
28  template <typename TrackerTraits>
30  template <typename TrackerTraits>
32  template <typename TrackerTraits>
34  template <typename TrackerTraits>
36 
37  template <typename TrackerTraits>
38  struct CellCutsT {
40  using T = TrackerTraits;
41 
42  CellCutsT() = default;
43 
44  CellCutsT(const bool doClusterCut,
45  const bool doZ0Cut,
46  const bool doPtCut,
47  const bool idealConditions,
48  const float z0Cut,
49  const float ptCut,
50  const std::vector<int>& phiCutsV)
55  z0Cut_(z0Cut),
56  ptCut_(ptCut) {
57  assert(phiCutsV.size() == TrackerTraits::nPairs);
58  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
59  }
60 
62  bool doZ0Cut_;
63  bool doPtCut_;
64  bool idealConditions_; //this is actually not used by phase2
65 
66  float z0Cut_;
67  float ptCut_;
68 
70 
71  __device__ __forceinline__ bool zSizeCut(H hh, int i, int o) const {
72  const uint32_t mi = hh[i].detectorIndex();
73 
74  bool innerB1 = mi < T::last_bpix1_detIndex;
75  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
76  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
77 
78  if (mes < 0)
79  return false;
80 
81  const uint32_t mo = hh[o].detectorIndex();
82  auto so = hh[o].clusterSizeY();
83 
84  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
85  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
86 
87  auto innerBarrel = mi < T::last_barrel_detIndex;
88  auto onlyBarrel = mo < T::last_barrel_detIndex;
89 
90  if (not innerBarrel and not onlyBarrel)
91  return false;
92  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
93 
94  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
95  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
96  }
97 
98  __device__ __forceinline__ bool clusterCut(H hh, int i) const {
99  const uint32_t mi = hh[i].detectorIndex();
100  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
101 
102  if (!innerB1orB2)
103  return false;
104 
105  bool innerB1 = mi < T::last_bpix1_detIndex;
106  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
107  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
108 
109  if (innerB1) // B1
110  if (mes > 0 && mes < T::minYsizeB1)
111  return true; // only long cluster (5*8)
112  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
113  if (innerB2) // B2 and F1
114  if (mes > 0 && mes < T::minYsizeB2)
115  return true;
116 
117  return false;
118  }
119  };
120 
121  template <typename TrackerTraits>
122  __device__ __forceinline__ void doubletsFromHisto(uint32_t nPairs,
123  uint32_t maxNumOfDoublets,
125  uint32_t* nCells,
126  CellNeighborsVector<TrackerTraits>* cellNeighbors,
127  CellTracksVector<TrackerTraits>* cellTracks,
128  HitsConstView<TrackerTraits> hh,
129  OuterHitOfCell<TrackerTraits> isOuterHitOfCell,
130  CellCutsT<TrackerTraits> const& cuts) {
131  // ysize cuts (z in the barrel) times 8
132  // these are used if doClusterCut is true
133 
134  const bool doClusterCut = cuts.doClusterCut_;
135  const bool doZ0Cut = cuts.doZ0Cut_;
136  const bool doPtCut = cuts.doPtCut_;
137 
138  const float z0cut = cuts.z0Cut_; // cm
139  const float hardPtCut = cuts.ptCut_; // GeV
140  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
141  const float minRadius = hardPtCut * 87.78f;
142  const float minRadius2T4 = 4.f * minRadius * minRadius;
143 
145 
146  auto const& __restrict__ phiBinner = hh.phiBinner();
147  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
148  assert(offsets);
149 
150  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
151 
152  // nPairsMax to be optimized later (originally was 64).
153  // If it should be much bigger, consider using a block-wide parallel prefix scan,
154  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
155 
157  __shared__ uint32_t ntot;
158  if (threadIdx.y == 0 && threadIdx.x == 0) {
160  for (uint32_t i = 1; i < nPairs; ++i) {
162  }
164  }
165  __syncthreads();
166 
167  // x runs faster
168  auto idy = blockIdx.y * blockDim.y + threadIdx.y;
169  auto first = threadIdx.x;
170  auto stride = blockDim.x;
171 
172  uint32_t pairLayerId = 0; // cannot go backward
173 
174  for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
176  ;
177  --pairLayerId; // move to lower_bound ??
178 
182 
184  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
185  assert(outer > inner);
186 
187  auto hoff = PhiBinner::histOff(outer);
188  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
189  i += offsets[inner];
190 
191  assert(i >= offsets[inner]);
192  assert(i < offsets[inner + 1]);
193 
194  // found hit corresponding to our cuda thread, now do the job
195 
196  if (hh[i].detectorIndex() > gpuClustering::maxNumModules)
197  continue; // invalid
198 
199  /* maybe clever, not effective when zoCut is on
200  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
201  auto fpos = (outer>3) & (outer<7);
202  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
203  */
204 
205  auto mez = hh[i].zGlobal();
206 
208  continue;
209 
210  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(hh, i))
211  continue;
212 
213  auto mep = hh[i].iphi();
214  auto mer = hh[i].rGlobal();
215 
216  auto ptcut = [&](int j, int16_t idphi) {
217  auto r2t4 = minRadius2T4;
218  auto ri = mer;
219  auto ro = hh[j].rGlobal();
220  auto dphi = short2phi(idphi);
221  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
222  };
223  auto z0cutoff = [&](int j) {
224  auto zo = hh[j].zGlobal();
225  auto ro = hh[j].rGlobal();
226  auto dr = ro - mer;
227  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
228  };
229 
230  auto iphicut = cuts.phiCuts[pairLayerId];
231 
232  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
233  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
234  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
235 
236 #ifdef GPU_DEBUG
237  int tot = 0;
238  int nmin = 0;
239  int tooMany = 0;
240 #endif
241 
242  auto khh = kh;
243  incr(khh);
244  for (auto kk = kl; kk != khh; incr(kk)) {
245 #ifdef GPU_DEBUG
246  if (kk != kl && kk != kh)
247  nmin += phiBinner.size(kk + hoff);
248 #endif
249  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
250  auto const* __restrict__ e = phiBinner.end(kk + hoff);
251  p += first;
252  for (; p < e; p += stride) {
253  auto oi = __ldg(p);
254  assert(oi >= offsets[outer]);
255  assert(oi < offsets[outer + 1]);
256  auto mo = hh[oi].detectorIndex();
257 
259  continue; // invalid
260 
261  if (doZ0Cut && z0cutoff(oi))
262  continue;
263  auto mop = hh[oi].iphi();
264  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
265  if (idphi > iphicut)
266  continue;
267 
268  if (doClusterCut && cuts.zSizeCut(hh, i, oi))
269  continue;
270  if (doPtCut && ptcut(oi, idphi))
271  continue;
272 
273  auto ind = atomicAdd(nCells, 1);
274  if (ind >= maxNumOfDoublets) {
275  atomicSub(nCells, 1);
276  break;
277  } // move to SimpleVector??
278  // int layerPairId, int doubletId, int innerHitId, int outerHitId)
279  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
280  isOuterHitOfCell[oi].push_back(ind);
281 #ifdef GPU_DEBUG
282  if (isOuterHitOfCell[oi].full())
283  ++tooMany;
284  ++tot;
285 #endif
286  }
287  }
288 // #endif
289 #ifdef GPU_DEBUG
290  if (tooMany > 0 || tot > 0)
291  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
292  i,
293  inner,
294  outer,
295  nmin,
296  tot,
297  tooMany,
298  iphicut,
301  tooMany > 0 ? "FULL!!" : "not full.");
302 #endif
303  } // loop in block...
304  }
305 
306 } // namespace gpuPixelDoublets
307 
308 #endif // RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
const dim3 threadIdx
Definition: cudaCompat.h:29
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int const int maxNumOfDoublets
#define __forceinline__
Definition: cudaCompat.h:22
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
const dim3 gridDim
Definition: cudaCompat.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: gpuFishbone.h:24
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: gpuFishbone.h:20
const dim3 blockDim
Definition: cudaCompat.h:30
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > int const int CellCutsT< TrackerTraits > *const cuts
int CellNeighborsVector< TrackerTraits > CellNeighbors< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
uint32_t const *__restrict__ offsets
GPUCACellT< TrackerTraits > * cells
Definition: gpuFishbone.h:34
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
__shared__ uint32_t innerLayerCumulativeSize[TrackerTraits::nPairs]
constexpr uint16_t last_barrel_layer
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
auto const &__restrict__ phiBinner
auto const isOuterHitOfCell
Definition: gpuFishbone.h:41
Definition: GenABIO.cc:168
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
GPUCACellT< TrackerTraits > uint32_t const *__restrict__ nCells
Definition: gpuFishbone.h:34
constexpr uint16_t maxNumModules
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: gpuFishbone.h:22
const dim3 blockIdx
Definition: cudaCompat.h:32
int CellNeighborsVector< TrackerTraits > * cellNeighbors
uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
T __ldg(T const *x)
Definition: cudaCompat.h:137
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
HitsConstView< TrackerTraits > H
if(innerB1) if(mes > 0 &&mes< T bool innerB2
constexpr float maxz[nPairs]
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: gpuFishbone.h:26
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: gpuFishbone.h:28
#define __device__
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: gpuFishbone.h:30
__shared__ uint32_t ntot
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr float maxr[nPairs]