CMS 3D CMS Logo

CAPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
2 #define RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
10 #include <alpaka/alpaka.hpp>
11 
20 
21 #include "CACell.h"
22 #include "CAStructures.h"
23 
24 //#define GPU_DEBUG
25 //#define NTUPLE_DEBUG
26 
28  using namespace cms::alpakatools;
29 
30  template <typename TrackerTraits>
32  template <typename TrackerTraits>
34  template <typename TrackerTraits>
36  template <typename TrackerTraits>
38  template <typename TrackerTraits>
40  template <typename TrackerTraits>
42 
43  template <typename TrackerTraits>
44  struct CellCutsT {
46  using T = TrackerTraits;
47 
48  CellCutsT() = default;
49 
50  CellCutsT(const bool doClusterCut,
51  const bool doZ0Cut,
52  const bool doPtCut,
53  const bool idealConditions,
54  const float z0Cut,
55  const float ptCut,
56  const std::vector<int>& phiCutsV)
57  : doClusterCut_(doClusterCut),
58  doZ0Cut_(doZ0Cut),
59  doPtCut_(doPtCut),
60  idealConditions_(idealConditions),
61  z0Cut_(z0Cut),
62  ptCut_(ptCut) {
63  assert(phiCutsV.size() == TrackerTraits::nPairs);
64  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
65  }
66 
68  bool doZ0Cut_;
69  bool doPtCut_;
70  bool idealConditions_; //this is actually not used by phase2
71 
72  float z0Cut_; //FIXME: check if could be const now
73  float ptCut_;
74 
76 
77  template <typename TAcc>
78  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
79  zSizeCut(const TAcc& acc, H hh, int i, int o) const {
80  const uint32_t mi = hh[i].detectorIndex();
81 
82  bool innerB1 = mi < T::last_bpix1_detIndex;
83  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
84  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
85 
86  if (mes < 0)
87  return false;
88 
89  const uint32_t mo = hh[o].detectorIndex();
90  auto so = hh[o].clusterSizeY();
91 
92  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
93  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
94 
95  auto innerBarrel = mi < T::last_barrel_detIndex;
96  auto onlyBarrel = mo < T::last_barrel_detIndex;
97 
98  if (not innerBarrel and not onlyBarrel)
99  return false;
100  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
101 
102  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
103  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
104  }
105 
106  template <typename TAcc>
107  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
108  clusterCut(const TAcc& acc, H hh, uint32_t i) const {
109  const uint32_t mi = hh[i].detectorIndex();
110  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
111 
112  if (!innerB1orB2)
113  return false;
114 
115  bool innerB1 = mi < T::last_bpix1_detIndex;
116  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
117  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
118 
119  if (innerB1) // B1
120  if (mes > 0 && mes < T::minYsizeB1)
121  return true; // only long cluster (5*8)
122  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
123  if (innerB2) // B2 and F1
124  if (mes > 0 && mes < T::minYsizeB2)
125  return true;
126 
127  return false;
128  }
129  };
130 
131  template <typename TrackerTraits, typename TAcc>
132  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
133  doubletsFromHisto(const TAcc& acc,
134  uint32_t nPairs,
135  const uint32_t maxNumOfDoublets,
137  uint32_t* nCells,
142  CellCutsT<TrackerTraits> const& cuts) { // ysize cuts (z in the barrel) times 8
143  // these are used if doClusterCut is true
144 
145  const bool doClusterCut = cuts.doClusterCut_;
146  const bool doZ0Cut = cuts.doZ0Cut_;
147  const bool doPtCut = cuts.doPtCut_;
148 
149  const float z0cut = cuts.z0Cut_; // cm
150  const float hardPtCut = cuts.ptCut_; // GeV
151  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
152  const float minRadius = hardPtCut * 87.78f;
153  const float minRadius2T4 = 4.f * minRadius * minRadius;
154 
156 
157  auto const& __restrict__ phiBinner = hh.phiBinner();
158  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
160 
161  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
162 
163  // nPairsMax to be optimized later (originally was 64).
164  // If it should much be bigger, consider using a block-wide parallel prefix scan,
165  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
166  auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
167  auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
168 
169  constexpr uint32_t dimIndexY = 0u;
170  constexpr uint32_t dimIndexX = 1u;
171  const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
172  const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
173 
174  if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
176  for (uint32_t i = 1; i < nPairs; ++i) {
178  }
180  }
181  alpaka::syncBlockThreads(acc);
182 
183  // declared outside the loop, as it cannot go backward
184  uint32_t pairLayerId = 0;
185 
186  // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid)
187  for (uint32_t j : cms::alpakatools::uniform_elements_y(acc, ntot)) {
188  // move to lower_bound ?
190  ;
191  --pairLayerId;
192 
196 
198  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
200 
201  auto hoff = PhiBinner::histOff(outer);
202  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
203  i += offsets[inner];
204 
207 
208  // found hit corresponding to our worker thread, now do the job
209  if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
210  continue; // invalid
211 
212  /* maybe clever, not effective when zoCut is on
213  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
214  auto fpos = (outer>3) & (outer<7);
215  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
216  */
217 
218  auto mez = hh[i].zGlobal();
219 
221  continue;
222 
223  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
224  continue;
225 
226  auto mep = hh[i].iphi();
227  auto mer = hh[i].rGlobal();
228 
229  // all cuts: true if fails
230  auto ptcut = [&](int j, int16_t idphi) {
231  auto r2t4 = minRadius2T4;
232  auto ri = mer;
233  auto ro = hh[j].rGlobal();
234  auto dphi = short2phi(idphi);
235  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
236  };
237  auto z0cutoff = [&](int j) {
238  auto zo = hh[j].zGlobal();
239  auto ro = hh[j].rGlobal();
240  auto dr = ro - mer;
241  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
242  };
243 
244  auto iphicut = cuts.phiCuts[pairLayerId];
245 
246  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
247  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
248  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
249 
250 #ifdef GPU_DEBUG
251  int tot = 0;
252  int nmin = 0;
253  int tooMany = 0;
254 #endif
255 
256  auto khh = kh;
257  incr(khh);
258  for (auto kk = kl; kk != khh; incr(kk)) {
259 #ifdef GPU_DEBUG
260  if (kk != kl && kk != kh)
261  nmin += phiBinner.size(kk + hoff);
262 #endif
263  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
264  auto const* __restrict__ e = phiBinner.end(kk + hoff);
265  auto const maxpIndex = e - p;
266 
267  // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid)
268  for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) {
269  // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough?
270  auto oi = p[pIndex];
272  ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]);
273  auto mo = hh[oi].detectorIndex();
274 
275  // invalid
277  continue;
278 
279  if (doZ0Cut && z0cutoff(oi))
280  continue;
281 
282  auto mop = hh[oi].iphi();
283  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
284 
285  if (idphi > iphicut)
286  continue;
287 
288  if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
289  continue;
290 
291  if (doPtCut && ptcut(oi, idphi))
292  continue;
293 
294  auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
295  if (ind >= maxNumOfDoublets) {
296  alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
297  break;
298  } // move to SimpleVector??
299  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
300  isOuterHitOfCell[oi].push_back(acc, ind);
301 #ifdef GPU_DEBUG
302  if (isOuterHitOfCell[oi].full())
303  ++tooMany;
304  ++tot;
305 #endif
306  }
307  }
308 // #endif
309 #ifdef GPU_DEBUG
310  if (tooMany > 0 or tot > 0)
311  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
312  i,
313  inner,
314  outer,
315  nmin,
316  tot,
317  tooMany,
318  iphicut,
321  tooMany > 0 ? "FULL!!" : "not full.");
322 #endif
323  } // loop in block...
324  }
325 
326 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets
327 
328 #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > * cellNeighbors
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:24
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > * cells
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > CellCutsT< TrackerTraits > const & cuts
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
assert(be >=bs)
const uint32_t threadIdxLocalY(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexY])
constexpr uint16_t last_barrel_layer
ALPAKA_FN_ACC auto uniform_elements_y(TAcc const &acc, TArgs... args)
Definition: workdivision.h:344
const uint32_t threadIdxLocalX(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexX])
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
Definition: GenABIO.cc:168
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(const TAcc &acc
ALPAKA_FN_ACC auto independent_group_elements_x(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: CAFishbone.h:30
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t nPairs
typename CACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: CAFishbone.h:34
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: CAFishbone.h:28
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t maxNumOfDoublets
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:32
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:26
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t * nCells
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
constexpr float maxz[nPairs]
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: CACell.h:36
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell
constexpr float maxr[nPairs]