CMS 3D CMS Logo

CAPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
2 #define RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
10 #include <alpaka/alpaka.hpp>
11 
19 
20 #include "CACell.h"
21 #include "CAStructures.h"
22 
23 //#define GPU_DEBUG
24 //#define NTUPLE_DEBUG
25 
27  using namespace cms::alpakatools;
28 
29  template <typename TrackerTraits>
31  template <typename TrackerTraits>
33  template <typename TrackerTraits>
35  template <typename TrackerTraits>
37  template <typename TrackerTraits>
39  template <typename TrackerTraits>
41 
42  template <typename TrackerTraits>
43  struct CellCutsT {
45  using T = TrackerTraits;
46 
47  CellCutsT() = default;
48 
49  CellCutsT(const bool doClusterCut,
50  const bool doZ0Cut,
51  const bool doPtCut,
52  const bool idealConditions,
53  const float z0Cut,
54  const float ptCut,
55  const std::vector<int>& phiCutsV)
56  : doClusterCut_(doClusterCut),
57  doZ0Cut_(doZ0Cut),
58  doPtCut_(doPtCut),
59  idealConditions_(idealConditions),
60  z0Cut_(z0Cut),
61  ptCut_(ptCut) {
62  assert(phiCutsV.size() == TrackerTraits::nPairs);
63  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
64  }
65 
67  bool doZ0Cut_;
68  bool doPtCut_;
69  bool idealConditions_; //this is actually not used by phase2
70 
71  float z0Cut_; //FIXME: check if could be const now
72  float ptCut_;
73 
75 
76  template <typename TAcc>
77  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
78  zSizeCut(const TAcc& acc, H hh, int i, int o) const {
79  const uint32_t mi = hh[i].detectorIndex();
80 
81  bool innerB1 = mi < T::last_bpix1_detIndex;
82  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
83  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
84 
85  if (mes < 0)
86  return false;
87 
88  const uint32_t mo = hh[o].detectorIndex();
89  auto so = hh[o].clusterSizeY();
90 
91  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
92  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
93 
94  auto innerBarrel = mi < T::last_barrel_detIndex;
95  auto onlyBarrel = mo < T::last_barrel_detIndex;
96 
97  if (not innerBarrel and not onlyBarrel)
98  return false;
99  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
100 
101  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
102  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
103  }
104 
105  template <typename TAcc>
106  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
107  clusterCut(const TAcc& acc, H hh, uint32_t i) const {
108  const uint32_t mi = hh[i].detectorIndex();
109  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
110 
111  if (!innerB1orB2)
112  return false;
113 
114  bool innerB1 = mi < T::last_bpix1_detIndex;
115  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
116  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
117 
118  if (innerB1) // B1
119  if (mes > 0 && mes < T::minYsizeB1)
120  return true; // only long cluster (5*8)
121  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
122  if (innerB2) // B2 and F1
123  if (mes > 0 && mes < T::minYsizeB2)
124  return true;
125 
126  return false;
127  }
128  };
129 
130  template <typename TrackerTraits, typename TAcc>
131  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
132  doubletsFromHisto(const TAcc& acc,
133  uint32_t nPairs,
134  const uint32_t maxNumOfDoublets,
136  uint32_t* nCells,
141  CellCutsT<TrackerTraits> const& cuts) { // ysize cuts (z in the barrel) times 8
142  // these are used if doClusterCut is true
143 
144  const bool doClusterCut = cuts.doClusterCut_;
145  const bool doZ0Cut = cuts.doZ0Cut_;
146  const bool doPtCut = cuts.doPtCut_;
147 
148  const float z0cut = cuts.z0Cut_; // cm
149  const float hardPtCut = cuts.ptCut_; // GeV
150  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
151  const float minRadius = hardPtCut * 87.78f;
152  const float minRadius2T4 = 4.f * minRadius * minRadius;
153 
155 
156  auto const& __restrict__ phiBinner = hh.phiBinner();
157  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
159 
160  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
161 
162  // nPairsMax to be optimized later (originally was 64).
163  // If it should much be bigger, consider using a block-wide parallel prefix scan,
164  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
165  auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
166  auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
167 
168  constexpr uint32_t dimIndexY = 0u;
169  constexpr uint32_t dimIndexX = 1u;
170  const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
171  const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
172 
173  if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
175  for (uint32_t i = 1; i < nPairs; ++i) {
177  }
179  }
180  alpaka::syncBlockThreads(acc);
181 
182  // declared outside the loop, as it cannot go backward
183  uint32_t pairLayerId = 0;
184 
185  // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid)
186  for (uint32_t j : cms::alpakatools::uniform_elements_y(acc, ntot)) {
187  // move to lower_bound ?
189  ;
190  --pairLayerId;
191 
195 
197  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
199 
200  auto hoff = PhiBinner::histOff(outer);
201  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
202  i += offsets[inner];
203 
206 
207  // found hit corresponding to our worker thread, now do the job
208  if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
209  continue; // invalid
210 
211  /* maybe clever, not effective when zoCut is on
212  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
213  auto fpos = (outer>3) & (outer<7);
214  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
215  */
216 
217  auto mez = hh[i].zGlobal();
218 
220  continue;
221 
222  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
223  continue;
224 
225  auto mep = hh[i].iphi();
226  auto mer = hh[i].rGlobal();
227 
228  // all cuts: true if fails
229  auto ptcut = [&](int j, int16_t idphi) {
230  auto r2t4 = minRadius2T4;
231  auto ri = mer;
232  auto ro = hh[j].rGlobal();
233  auto dphi = short2phi(idphi);
234  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
235  };
236  auto z0cutoff = [&](int j) {
237  auto zo = hh[j].zGlobal();
238  auto ro = hh[j].rGlobal();
239  auto dr = ro - mer;
240  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
241  };
242 
243  auto iphicut = cuts.phiCuts[pairLayerId];
244 
245  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
246  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
247  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
248 
249 #ifdef GPU_DEBUG
250  int tot = 0;
251  int nmin = 0;
252  int tooMany = 0;
253 #endif
254 
255  auto khh = kh;
256  incr(khh);
257  for (auto kk = kl; kk != khh; incr(kk)) {
258 #ifdef GPU_DEBUG
259  if (kk != kl && kk != kh)
260  nmin += phiBinner.size(kk + hoff);
261 #endif
262  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
263  auto const* __restrict__ e = phiBinner.end(kk + hoff);
264  auto const maxpIndex = e - p;
265 
266  // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid)
267  for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) {
268  // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough?
269  auto oi = p[pIndex];
271  ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]);
272  auto mo = hh[oi].detectorIndex();
273 
274  // invalid
276  continue;
277 
278  if (doZ0Cut && z0cutoff(oi))
279  continue;
280 
281  auto mop = hh[oi].iphi();
282  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
283 
284  if (idphi > iphicut)
285  continue;
286 
287  if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
288  continue;
289 
290  if (doPtCut && ptcut(oi, idphi))
291  continue;
292 
293  auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
294  if (ind >= maxNumOfDoublets) {
295  alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
296  break;
297  } // move to SimpleVector??
298  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
299  isOuterHitOfCell[oi].push_back(acc, ind);
300 #ifdef GPU_DEBUG
301  if (isOuterHitOfCell[oi].full())
302  ++tooMany;
303  ++tot;
304 #endif
305  }
306  }
307 // #endif
308 #ifdef GPU_DEBUG
309  if (tooMany > 0 or tot > 0)
310  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
311  i,
312  inner,
313  outer,
314  nmin,
315  tot,
316  tooMany,
317  iphicut,
320  tooMany > 0 ? "FULL!!" : "not full.");
321 #endif
322  } // loop in block...
323  }
324 
325 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets
326 
327 #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > * cellNeighbors
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:23
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > * cells
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > CellCutsT< TrackerTraits > const & cuts
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
assert(be >=bs)
const uint32_t threadIdxLocalY(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexY])
constexpr uint16_t last_barrel_layer
ALPAKA_FN_ACC auto uniform_elements_y(TAcc const &acc, TArgs... args)
Definition: workdivision.h:344
const uint32_t threadIdxLocalX(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexX])
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
Definition: GenABIO.cc:168
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(const TAcc &acc
ALPAKA_FN_ACC auto independent_group_elements_x(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: CAFishbone.h:29
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t nPairs
typename CACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: CAFishbone.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: CAFishbone.h:27
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t maxNumOfDoublets
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:31
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:25
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t * nCells
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
constexpr float maxz[nPairs]
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: CACell.h:36
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell
constexpr float maxr[nPairs]