CMS 3D CMS Logo

CAPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
2 #define RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
10 #include <alpaka/alpaka.hpp>
11 
18 
19 #include "CACell.h"
20 #include "CAStructures.h"
21 
22 //#define GPU_DEBUG
23 //#define NTUPLE_DEBUG
24 
26  using namespace cms::alpakatools;
27 
28  template <typename TrackerTraits>
30  template <typename TrackerTraits>
32  template <typename TrackerTraits>
34  template <typename TrackerTraits>
36  template <typename TrackerTraits>
38  template <typename TrackerTraits>
40 
41  template <typename TrackerTraits>
42  struct CellCutsT {
44  using T = TrackerTraits;
45 
46  CellCutsT() = default;
47 
48  CellCutsT(const bool doClusterCut,
49  const bool doZ0Cut,
50  const bool doPtCut,
51  const bool idealConditions,
52  const float z0Cut,
53  const float ptCut,
54  const std::vector<int>& phiCutsV)
55  : doClusterCut_(doClusterCut),
56  doZ0Cut_(doZ0Cut),
57  doPtCut_(doPtCut),
58  idealConditions_(idealConditions),
59  z0Cut_(z0Cut),
60  ptCut_(ptCut) {
61  assert(phiCutsV.size() == TrackerTraits::nPairs);
62  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
63  }
64 
66  bool doZ0Cut_;
67  bool doPtCut_;
68  bool idealConditions_; //this is actually not used by phase2
69 
70  float z0Cut_; //FIXME: check if could be const now
71  float ptCut_;
72 
74 
75  template <typename TAcc>
76  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
77  zSizeCut(const TAcc& acc, H hh, int i, int o) const {
78  const uint32_t mi = hh[i].detectorIndex();
79 
80  bool innerB1 = mi < T::last_bpix1_detIndex;
81  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
82  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
83 
84  if (mes < 0)
85  return false;
86 
87  const uint32_t mo = hh[o].detectorIndex();
88  auto so = hh[o].clusterSizeY();
89 
90  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
91  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
92 
93  auto innerBarrel = mi < T::last_barrel_detIndex;
94  auto onlyBarrel = mo < T::last_barrel_detIndex;
95 
96  if (not innerBarrel and not onlyBarrel)
97  return false;
98  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
99 
100  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
101  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
102  }
103 
104  template <typename TAcc>
105  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
106  clusterCut(const TAcc& acc, H hh, uint32_t i) const {
107  const uint32_t mi = hh[i].detectorIndex();
108  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
109 
110  if (!innerB1orB2)
111  return false;
112 
113  bool innerB1 = mi < T::last_bpix1_detIndex;
114  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
115  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
116 
117  if (innerB1) // B1
118  if (mes > 0 && mes < T::minYsizeB1)
119  return true; // only long cluster (5*8)
120  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
121  if (innerB2) // B2 and F1
122  if (mes > 0 && mes < T::minYsizeB2)
123  return true;
124 
125  return false;
126  }
127  };
128 
129  template <typename TrackerTraits, typename TAcc>
130  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
131  doubletsFromHisto(const TAcc& acc,
132  uint32_t nPairs,
133  const uint32_t maxNumOfDoublets,
135  uint32_t* nCells,
140  CellCutsT<TrackerTraits> const& cuts) { // ysize cuts (z in the barrel) times 8
141  // these are used if doClusterCut is true
142 
143  const bool doClusterCut = cuts.doClusterCut_;
144  const bool doZ0Cut = cuts.doZ0Cut_;
145  const bool doPtCut = cuts.doPtCut_;
146 
147  const float z0cut = cuts.z0Cut_; // cm
148  const float hardPtCut = cuts.ptCut_; // GeV
149  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
150  const float minRadius = hardPtCut * 87.78f;
151  const float minRadius2T4 = 4.f * minRadius * minRadius;
152 
154 
155  auto const& __restrict__ phiBinner = hh.phiBinner();
156  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
158 
159  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
160 
161  // nPairsMax to be optimized later (originally was 64).
162  // If it should much be bigger, consider using a block-wide parallel prefix scan,
163  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
164  auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
165  auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
166 
167  constexpr uint32_t dimIndexY = 0u;
168  constexpr uint32_t dimIndexX = 1u;
169  const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
170  const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
171 
172  if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
174  for (uint32_t i = 1; i < nPairs; ++i) {
176  }
178  }
179  alpaka::syncBlockThreads(acc);
180 
181  // declared outside the loop, as it cannot go backward
182  uint32_t pairLayerId = 0;
183 
184  // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid)
185  for (uint32_t j : cms::alpakatools::uniform_elements_y(acc, ntot)) {
186  // move to lower_bound ?
188  ;
189  --pairLayerId;
190 
194 
196  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
198 
199  auto hoff = PhiBinner::histOff(outer);
200  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
201  i += offsets[inner];
202 
205 
206  // found hit corresponding to our worker thread, now do the job
207  if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
208  continue; // invalid
209 
210  /* maybe clever, not effective when zoCut is on
211  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
212  auto fpos = (outer>3) & (outer<7);
213  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
214  */
215 
216  auto mez = hh[i].zGlobal();
217 
219  continue;
220 
221  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
222  continue;
223 
224  auto mep = hh[i].iphi();
225  auto mer = hh[i].rGlobal();
226 
227  // all cuts: true if fails
228  auto ptcut = [&](int j, int16_t idphi) {
229  auto r2t4 = minRadius2T4;
230  auto ri = mer;
231  auto ro = hh[j].rGlobal();
232  auto dphi = short2phi(idphi);
233  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
234  };
235  auto z0cutoff = [&](int j) {
236  auto zo = hh[j].zGlobal();
237  auto ro = hh[j].rGlobal();
238  auto dr = ro - mer;
239  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
240  };
241 
242  auto iphicut = cuts.phiCuts[pairLayerId];
243 
244  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
245  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
246  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
247 
248 #ifdef GPU_DEBUG
249  int tot = 0;
250  int nmin = 0;
251  int tooMany = 0;
252 #endif
253 
254  auto khh = kh;
255  incr(khh);
256  for (auto kk = kl; kk != khh; incr(kk)) {
257 #ifdef GPU_DEBUG
258  if (kk != kl && kk != kh)
259  nmin += phiBinner.size(kk + hoff);
260 #endif
261  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
262  auto const* __restrict__ e = phiBinner.end(kk + hoff);
263  auto const maxpIndex = e - p;
264 
265  // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid)
266  for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) {
267  // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough?
268  auto oi = p[pIndex];
270  ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]);
271  auto mo = hh[oi].detectorIndex();
272 
273  // invalid
275  continue;
276 
277  if (doZ0Cut && z0cutoff(oi))
278  continue;
279 
280  auto mop = hh[oi].iphi();
281  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
282 
283  if (idphi > iphicut)
284  continue;
285 
286  if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
287  continue;
288 
289  if (doPtCut && ptcut(oi, idphi))
290  continue;
291 
292  auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
293  if (ind >= maxNumOfDoublets) {
294  alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
295  break;
296  } // move to SimpleVector??
297  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
298  isOuterHitOfCell[oi].push_back(acc, ind);
299 #ifdef GPU_DEBUG
300  if (isOuterHitOfCell[oi].full())
301  ++tooMany;
302  ++tot;
303 #endif
304  }
305  }
306 // #endif
307 #ifdef GPU_DEBUG
308  if (tooMany > 0 or tot > 0)
309  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
310  i,
311  inner,
312  outer,
313  nmin,
314  tot,
315  tooMany,
316  iphicut,
319  tooMany > 0 ? "FULL!!" : "not full.");
320 #endif
321  } // loop in block...
322  }
323 
324 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets
325 
326 #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > * cellNeighbors
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:23
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > * cells
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > CellCutsT< TrackerTraits > const & cuts
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
assert(be >=bs)
const uint32_t threadIdxLocalY(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexY])
constexpr uint16_t last_barrel_layer
ALPAKA_FN_ACC auto uniform_elements_y(TAcc const &acc, TArgs... args)
Definition: workdivision.h:344
const uint32_t threadIdxLocalX(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexX])
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
Definition: GenABIO.cc:168
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(const TAcc &acc
ALPAKA_FN_ACC auto independent_group_elements_x(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: CAFishbone.h:29
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t nPairs
typename CACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: CAFishbone.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: CAFishbone.h:27
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t maxNumOfDoublets
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:31
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:25
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t * nCells
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
constexpr float maxz[nPairs]
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: CACell.h:36
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell
constexpr float maxr[nPairs]