CMS 3D CMS Logo

CAPixelDoubletsAlgos.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoubletsAlgos_h
2 #define RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoubletsAlgos_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 #include <cstdio>
8 #include <limits>
9 
10 #include <alpaka/alpaka.hpp>
11 
18 
19 #include "CACell.h"
20 #include "CAStructures.h"
21 
22 //#define GPU_DEBUG
23 //#define NTUPLE_DEBUG
24 
26  namespace caPixelDoublets {
27  using namespace cms::alpakatools;
28 
29  template <typename TrackerTraits>
31  template <typename TrackerTraits>
33  template <typename TrackerTraits>
35  template <typename TrackerTraits>
37  template <typename TrackerTraits>
39  template <typename TrackerTraits>
41 
42  template <typename TrackerTraits>
43  struct CellCutsT {
45  using T = TrackerTraits;
46 
47  CellCutsT() = default;
48 
49  CellCutsT(const bool doClusterCut,
50  const bool doZ0Cut,
51  const bool doPtCut,
52  const bool idealConditions,
53  const float z0Cut,
54  const float ptCut,
55  const std::vector<int>& phiCutsV)
56  : doClusterCut_(doClusterCut),
57  doZ0Cut_(doZ0Cut),
58  doPtCut_(doPtCut),
59  idealConditions_(idealConditions),
60  z0Cut_(z0Cut),
61  ptCut_(ptCut) {
62  assert(phiCutsV.size() == TrackerTraits::nPairs);
63  std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
64  }
65 
67  bool doZ0Cut_;
68  bool doPtCut_;
69  bool idealConditions_; //this is actually not used by phase2
70 
71  float z0Cut_; //FIXME: check if could be const now
72  float ptCut_;
73 
75 
76  template <typename TAcc>
77  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
78  zSizeCut(const TAcc& acc, H hh, int i, int o) const {
79  const uint32_t mi = hh[i].detectorIndex();
80 
81  bool innerB1 = mi < T::last_bpix1_detIndex;
82  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
83  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
84 
85  if (mes < 0)
86  return false;
87 
88  const uint32_t mo = hh[o].detectorIndex();
89  auto so = hh[o].clusterSizeY();
90 
91  auto dz = hh[i].zGlobal() - hh[o].zGlobal();
92  auto dr = hh[i].rGlobal() - hh[o].rGlobal();
93 
94  auto innerBarrel = mi < T::last_barrel_detIndex;
95  auto onlyBarrel = mo < T::last_barrel_detIndex;
96 
97  if (not innerBarrel and not onlyBarrel)
98  return false;
99  auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
100 
101  return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
102  : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
103  }
104 
105  template <typename TAcc>
106  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline))
107  clusterCut(const TAcc& acc, H hh, uint32_t i) const {
108  const uint32_t mi = hh[i].detectorIndex();
109  bool innerB1orB2 = mi < T::last_bpix2_detIndex;
110 
111  if (!innerB1orB2)
112  return false;
113 
114  bool innerB1 = mi < T::last_bpix1_detIndex;
115  bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
116  auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
117 
118  if (innerB1) // B1
119  if (mes > 0 && mes < T::minYsizeB1)
120  return true; // only long cluster (5*8)
121  bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number
122  if (innerB2) // B2 and F1
123  if (mes > 0 && mes < T::minYsizeB2)
124  return true;
125 
126  return false;
127  }
128  };
129 
130  template <typename TrackerTraits, typename TAcc>
131  ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
132  doubletsFromHisto(const TAcc& acc,
133  uint32_t nPairs,
134  const uint32_t maxNumOfDoublets,
136  uint32_t* nCells,
141  CellCutsT<TrackerTraits> const& cuts) { // ysize cuts (z in the barrel) times 8
142  // these are used if doClusterCut is true
143 
144  const bool doClusterCut = cuts.doClusterCut_;
145  const bool doZ0Cut = cuts.doZ0Cut_;
146  const bool doPtCut = cuts.doPtCut_;
147 
148  const float z0cut = cuts.z0Cut_; // cm
149  const float hardPtCut = cuts.ptCut_; // GeV
150  // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
151  const float minRadius = hardPtCut * 87.78f;
152  const float minRadius2T4 = 4.f * minRadius * minRadius;
153 
155 
156  auto const& __restrict__ phiBinner = hh.phiBinner();
157  uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
159 
160  auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
161 
162  // nPairsMax to be optimized later (originally was 64).
163  // If it should much be bigger, consider using a block-wide parallel prefix scan,
164  // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
165  auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
166  auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
167 
168  constexpr uint32_t dimIndexY = 0u;
169  constexpr uint32_t dimIndexX = 1u;
170  const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
171  const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
172 
173  if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
175  for (uint32_t i = 1; i < nPairs; ++i) {
177  }
179  }
180  alpaka::syncBlockThreads(acc);
181 
182  // x runs faster
183  const uint32_t blockDimensionX(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[dimIndexX]);
184  const auto& [firstElementIdxNoStrideX, endElementIdxNoStrideX] =
186 
187  uint32_t pairLayerId = 0; // cannot go backward
188 
189  // Outermost loop on Y
190  const uint32_t gridDimensionY(alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[dimIndexY]);
191  const auto& [firstElementIdxNoStrideY, endElementIdxNoStrideY] =
193  uint32_t firstElementIdxY = firstElementIdxNoStrideY;
194  uint32_t endElementIdxY = endElementIdxNoStrideY;
195 
196  //const uint32_t incY = cms::alpakatools::requires_single_thread_per_block_v<TAcc> ? 1 : gridDimensionY;
197  for (uint32_t j = firstElementIdxY; j < ntot; j++) {
200  break;
201 
203  ;
204  --pairLayerId; // move to lower_bound ??
205 
209 
211  uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
213 
214  auto hoff = PhiBinner::histOff(outer);
215  auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
216  i += offsets[inner];
217 
220 
221  // found hit corresponding to our cuda thread, now do the job
222  if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
223  continue; // invalid
224 
225  /* maybe clever, not effective when zoCut is on
226  auto bpos = (mi%8)/4; // if barrel is 1 for z>0
227  auto fpos = (outer>3) & (outer<7);
228  if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
229  */
230 
231  auto mez = hh[i].zGlobal();
232 
234  continue;
235 
236  if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
237  continue;
238 
239  auto mep = hh[i].iphi();
240  auto mer = hh[i].rGlobal();
241 
242  // all cuts: true if fails
243  auto ptcut = [&](int j, int16_t idphi) {
244  auto r2t4 = minRadius2T4;
245  auto ri = mer;
246  auto ro = hh[j].rGlobal();
247  auto dphi = short2phi(idphi);
248  return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
249  };
250  auto z0cutoff = [&](int j) {
251  auto zo = hh[j].zGlobal();
252  auto ro = hh[j].rGlobal();
253  auto dr = ro - mer;
254  return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
255  };
256 
257  auto iphicut = cuts.phiCuts[pairLayerId];
258 
259  auto kl = PhiBinner::bin(int16_t(mep - iphicut));
260  auto kh = PhiBinner::bin(int16_t(mep + iphicut));
261  auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
262 
263 #ifdef GPU_DEBUG
264  int tot = 0;
265  int nmin = 0;
266  int tooMany = 0;
267 #endif
268 
269  auto khh = kh;
270  incr(khh);
271  for (auto kk = kl; kk != khh; incr(kk)) {
272 #ifdef GPU_DEBUG
273  if (kk != kl && kk != kh)
274  nmin += phiBinner.size(kk + hoff);
275 #endif
276  auto const* __restrict__ p = phiBinner.begin(kk + hoff);
277  auto const* __restrict__ e = phiBinner.end(kk + hoff);
278  auto const maxpIndex = e - p;
279 
280  // Here we parallelize in X
281  uint32_t firstElementIdxX = firstElementIdxNoStrideX;
282  uint32_t endElementIdxX = endElementIdxNoStrideX;
283 
284  for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; ++pIndex) {
286  pIndex, firstElementIdxX, endElementIdxX, blockDimensionX, maxpIndex))
287  break;
288  auto oi = p[pIndex]; // auto oi = __ldg(p); is not allowed since __ldg is device-only
291  auto mo = hh[oi].detectorIndex();
292 
294  continue; // invalid
295 
296  if (doZ0Cut && z0cutoff(oi))
297  continue;
298 
299  auto mop = hh[oi].iphi();
300  uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
301 
302  if (idphi > iphicut)
303  continue;
304 
305  if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
306  continue;
307 
308  if (doPtCut && ptcut(oi, idphi))
309  continue;
310 
311  auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
312  if (ind >= maxNumOfDoublets) {
313  alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
314  break;
315  } // move to SimpleVector??
316  cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
317  isOuterHitOfCell[oi].push_back(acc, ind);
318 #ifdef GPU_DEBUG
319  if (isOuterHitOfCell[oi].full())
320  ++tooMany;
321  ++tot;
322 #endif
323  }
324  }
325 // #endif
326 #ifdef GPU_DEBUG
327  if (tooMany > 0 or tot > 0)
328  printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
329  i,
330  inner,
331  outer,
332  nmin,
333  tot,
334  tooMany,
335  iphicut,
338  tooMany > 0 ? "FULL!!" : "not full.");
339 #endif
340  } // loop in block...
341  } // namespace caPixelDoublets
342  } // namespace caPixelDoublets
343 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
344 #endif // RecoPixelVertexing_PixelTriplets_CAPixelDoubletsAlgos_h
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided(Idx &i, Idx &firstElementIdx, Idx &endElementIdx, const Idx stride, const Idx maxNumberOfElements)
Definition: workdivision.h:921
typename TrackingRecHitSoA< TrackerTraits >::PhiBinner PhiBinner
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > * cellNeighbors
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: CAFishbone.h:23
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > * cells
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > CellCutsT< TrackerTraits > const & cuts
CellCutsT(const bool doClusterCut, const bool doZ0Cut, const bool doPtCut, const bool idealConditions, const float z0Cut, const float ptCut, const std::vector< int > &phiCutsV)
assert(be >=bs)
const uint32_t threadIdxLocalY(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexY])
constexpr uint16_t last_barrel_layer
const uint32_t threadIdxLocalX(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[dimIndexX])
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
Definition: GenABIO.cc:168
std::vector< Block > Blocks
Definition: Block.h:99
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(const TAcc &acc
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > hh
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: CAFishbone.h:29
ALPAKA_FN_ACC std::pair< Idx, Idx > element_index_range_in_block(const TAcc &acc, const Idx elementIdxShift, const unsigned int dimIndex=0u)
Definition: workdivision.h:818
const uint32_t gridDimensionY(alpaka::getWorkDiv< alpaka::Grid, alpaka::Elems >(acc)[dimIndexY])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t nPairs
typename CACellT< TrackerTraits >::HitsConstView HitsConstView
Definition: CAFishbone.h:33
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: CAFishbone.h:27
ALPAKA_FN_ACC std::pair< Idx, Idx > element_index_range_in_grid(const TAcc &acc, Idx elementIdxShift, const unsigned int dimIndex=0u)
Definition: workdivision.h:862
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t maxNumOfDoublets
const uint32_t blockDimensionX(alpaka::getWorkDiv< alpaka::Block, alpaka::Elems >(acc)[dimIndexX])
constexpr float short2phi(short x)
Definition: approx_atan2.h:285
constexpr float minz[nPairs]
caStructures::OuterHitOfCellT< TrackerTraits > OuterHitOfCell
Definition: CAFishbone.h:31
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: CAFishbone.h:25
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t * nCells
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > * cellTracks
constexpr float maxz[nPairs]
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: CACell.h:35
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void uint32_t const uint32_t CACellT< TrackerTraits > uint32_t CellNeighborsVector< TrackerTraits > CellTracksVector< TrackerTraits > HitsConstView< TrackerTraits > OuterHitOfCell< TrackerTraits > isOuterHitOfCell
constexpr float maxr[nPairs]