CMS 3D CMS Logo

CAPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
2 #define RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
10 #include <alpaka/alpaka.hpp>
11 
19 
20 #include "CACell.h"
21 #include "CAStructures.h"
22 
23 //#define GPU_DEBUG
24 //#define NTUPLE_DEBUG
25 
27  using namespace cms::alpakatools;
28 
29  template <typename TrackerTraits>
31  template <typename TrackerTraits>
33  template <typename TrackerTraits>
35  template <typename TrackerTraits>
37  template <typename TrackerTraits>
39  template <typename TrackerTraits>
41 
42  template <typename TrackerTraits>
43  struct CellCutsT {
45  using T = TrackerTraits;
46 
47  CellCutsT() = default;
48 
49  CellCutsT(const bool doClusterCut,
50  const bool doZ0Cut,
51  const bool doPtCut,
52  const bool idealConditions,
53  const float z0Cut,
54  const float ptCut,
55  const std::vector<int>& phiCutsV)
56  : doClusterCut_(doClusterCut),
57  doZ0Cut_(doZ0Cut),
58  doPtCut_(doPtCut),
59  idealConditions_(idealConditions),
60  z0Cut_(z0Cut),
61  ptCut_(ptCut) {
62  assert(phiCutsV.size() == TrackerTraits::nPairs);
63  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
64  }
65 
67  bool doZ0Cut_;
68  bool doPtCut_;
69  bool idealConditions_; //this is actually not used by phase2
70 
71  float z0Cut_; //FIXME: check if could be const now
72  float ptCut_;
73 
75 
76  template <typename TAcc>
77  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) zSizeCut(const TAcc& acc,
78  H hh,
79  int i,
80  int o) const {
81  const uint32_t mi = hh[i].detectorIndex();
82 
83  bool innerB1 = mi < T::last_bpix1_detIndex;
84  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
85  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
86 
87  if (mes < 0)
88  return false;
89 
90  const uint32_t mo = hh[o].detectorIndex();
91  auto so = hh[o].clusterSizeY();
92 
93  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
94  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
95 
96  auto innerBarrel = mi < T::last_barrel_detIndex;
97  auto onlyBarrel = mo < T::last_barrel_detIndex;
98 
99  if (not innerBarrel and not onlyBarrel)
100  return false;
101  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
102 
103  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
104  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
105  }
106 
107  template <typename TAcc>
108  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) clusterCut(const TAcc& acc,
109  H hh,
110  uint32_t i) const {
111  const uint32_t mi = hh[i].detectorIndex();
112  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
113 
114  if (!innerB1orB2)
115  return false;
116 
117  bool innerB1 = mi < T::last_bpix1_detIndex;
118  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
119  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
120 
121  if (innerB1) // B1
122  if (mes > 0 && mes < T::minYsizeB1)
123  return true; // only long cluster (5*8)
124  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
125  if (innerB2) // B2 and F1
126  if (mes > 0 && mes < T::minYsizeB2)
127  return true;
128 
129  return false;
130  }
131  };
132 
133  template <typename TrackerTraits, typename TAcc>
134  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(
135  const TAcc& acc,
136  uint32_t nPairs,
137  const uint32_t maxNumOfDoublets,
139  uint32_t* nCells,
144  CellCutsT<TrackerTraits> const& cuts) { // ysize cuts (z in the barrel) times 8
145  // these are used if doClusterCut is true
146 
147  const bool doClusterCut = cuts.doClusterCut_;
148  const bool doZ0Cut = cuts.doZ0Cut_;
149  const bool doPtCut = cuts.doPtCut_;
150 
151  const float z0cut = cuts.z0Cut_; // cm
152  const float hardPtCut = cuts.ptCut_; // GeV
153  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
154  const float minRadius = hardPtCut * 87.78f;
155  const float minRadius2T4 = 4.f * minRadius * minRadius;
156 
158 
159  auto const& __restrict__ phiBinner = hh.phiBinner();
160  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
162 
163  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
164 
165  // nPairsMax to be optimized later (originally was 64).
166  // If it should much be bigger, consider using a block-wide parallel prefix scan,
167  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
168  auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
169  auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
170 
171  constexpr uint32_t dimIndexY = 0u;
172  constexpr uint32_t dimIndexX = 1u;
173  const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
174  const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
175 
176  if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
178  for (uint32_t i = 1; i < nPairs; ++i) {
180  }
182  }
183  alpaka::syncBlockThreads(acc);
184 
185  // declared outside the loop, as it cannot go backward
186  uint32_t pairLayerId = 0;
187 
188  // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid)
189  for (uint32_t j : cms::alpakatools::uniform_elements_y(acc, ntot)) {
190  // move to lower_bound ?
192  ;
193  --pairLayerId;
194 
198 
200  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
202 
203  auto hoff = PhiBinner::histOff(outer);
204  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
205  i += offsets[inner];
206 
209 
210  // found hit corresponding to our worker thread, now do the job
211  if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
212  continue; // invalid
213 
214  /* maybe clever, not effective when zoCut is on
215  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
216  auto fpos = (outer>3) & (outer<7);
217  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
218  */
219 
220  auto mez = hh[i].zGlobal();
221 
223  continue;
224 
225  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
226  continue;
227 
228  auto mep = hh[i].iphi();
229  auto mer = hh[i].rGlobal();
230 
231  // all cuts: true if fails
232  auto ptcut = [&](int j, int16_t idphi) {
233  auto r2t4 = minRadius2T4;
234  auto ri = mer;
235  auto ro = hh[j].rGlobal();
236  auto dphi = short2phi(idphi);
237  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
238  };
239  auto z0cutoff = [&](int j) {
240  auto zo = hh[j].zGlobal();
241  auto ro = hh[j].rGlobal();
242  auto dr = ro - mer;
243  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
244  };
245 
246  auto iphicut = cuts.phiCuts[pairLayerId];
247 
248  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
249  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
250  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
251 
252 #ifdef GPU_DEBUG
253  int tot = 0;
254  int nmin = 0;
255  int tooMany = 0;
256 #endif
257 
258  auto khh = kh;
259  incr(khh);
260  for (auto kk = kl; kk != khh; incr(kk)) {
261 #ifdef GPU_DEBUG
262  if (kk != kl && kk != kh)
263  nmin += phiBinner.size(kk + hoff);
264 #endif
265  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
266  auto const* __restrict__ e = phiBinner.end(kk + hoff);
267  auto const maxpIndex = e - p;
268 
269  // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid)
270  for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) {
271  // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough?
272  auto oi = p[pIndex];
274  ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]);
275  auto mo = hh[oi].detectorIndex();
276 
277  // invalid
279  continue;
280 
281  if (doZ0Cut && z0cutoff(oi))
282  continue;
283 
284  auto mop = hh[oi].iphi();
285  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
286 
287  if (idphi > iphicut)
288  continue;
289 
290  if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
291  continue;
292 
293  if (doPtCut && ptcut(oi, idphi))
294  continue;
295 
296  auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
297  if (ind >= maxNumOfDoublets) {
298  alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
299  break;
300  } // move to SimpleVector??
301  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
302  isOuterHitOfCell[oi].push_back(acc, ind);
303 #ifdef GPU_DEBUG
304  if (isOuterHitOfCell[oi].full())
305  ++tooMany;
306  ++tot;
307 #endif
308  }
309  }
310 // #endif
311 #ifdef GPU_DEBUG
312  if (tooMany > 0 or tot > 0)
313  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
314  i,
315  inner,
316  outer,
317  nmin,
318  tot,
319  tooMany,
320  iphicut,
323  tooMany > 0 ? "FULL!!" : "not full.");
324 #endif
325  } // loop in block...
326  }
327 
328 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets
329 
330 #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > * cellNeighbors
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:23
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > * cells
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > CellCutsT< TrackerTraits > const & cuts
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
assert(be >=bs)
const uint32_t threadIdxLocalY(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexY])
constexpr uint16_t last_barrel_layer
ALPAKA_FN_ACC auto uniform_elements_y(TAcc const &acc, TArgs... args)
Definition: workdivision.h:344
const uint32_t threadIdxLocalX(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexX])
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
Definition: GenABIO.cc:168
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(const TAcc &acc
ALPAKA_FN_ACC auto independent_group_elements_x(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: CAFishbone.h:29
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t nPairs
typename CACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: CAFishbone.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: CAFishbone.h:27
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t maxNumOfDoublets
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:31
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:25
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t * nCells
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
constexpr float maxz[nPairs]
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: CACell.h:36
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell
constexpr float maxr[nPairs]