CMS 3D CMS Logo

GPUCACell.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelSeeding_plugins_GPUCACell_h
2 #define RecoTracker_PixelSeeding_plugins_GPUCACell_h
3 
4 //
5 // Author: Felice Pantaleo, CERN
6 //
7 
8 // #define ONLY_TRIPLETS_IN_HOLE
9 
10 #include <cuda_runtime.h>
11 
19 #include "CAStructures.h"
20 
21 template <typename TrackerTraits>
22 class GPUCACellT {
23 public:
24  using PtrAsInt = unsigned long long;
25 
26  static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit;
33 
35  using hindex_type = typename TrackerTraits::hindex_type;
36  using tindex_type = typename TrackerTraits::tindex_type;
38 
40 
43  static constexpr auto bad = pixelTrack::Quality::bad;
44 
45  enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
46 
47  GPUCACellT() = default;
48 
51  const HitsConstView& hh,
52  int layerPairId,
55  theInnerHitId = innerHitId;
60 
61  // optimization that depends on access pattern
62  theInnerZ = hh[innerHitId].zGlobal();
63  theInnerR = hh[innerHitId].rGlobal();
64 
65  // link to default empty
68  assert(outerNeighbors().empty());
69  assert(tracks().empty());
70  }
71 
72  __device__ __forceinline__ int addOuterNeighbor(typename TrackerTraits::cindex_type t,
74  // use smart cache
75  if (outerNeighbors().empty()) {
76  auto i = cellNeighbors.extend(); // maybe wasted....
77  if (i > 0) {
78  cellNeighbors[i].reset();
79  __threadfence();
80 #ifdef __CUDACC__
81  auto zero = (PtrAsInt)(&cellNeighbors[0]);
83  zero,
84  (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back...
85 #else
87 #endif
88  } else
89  return -1;
90  }
91  __threadfence();
92  return outerNeighbors().push_back(t);
93  }
94 
96  if (tracks().empty()) {
97  auto i = cellTracks.extend(); // maybe wasted....
98  if (i > 0) {
99  cellTracks[i].reset();
100  __threadfence();
101 #ifdef __CUDACC__
102  auto zero = (PtrAsInt)(&cellTracks[0]);
103  atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back...
104 #else
105  theTracks = &cellTracks[i];
106 #endif
107  } else
108  return -1;
109  }
110  __threadfence();
111  return tracks().push_back(t);
112  }
113 
117  __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
118  __device__ __forceinline__ float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); }
119  __device__ __forceinline__ float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); }
120  __device__ __forceinline__ float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); }
121  __device__ __forceinline__ float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); }
122  __device__ __forceinline__ float inner_z(const HitsConstView& hh) const { return theInnerZ; }
123  // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
124  __device__ __forceinline__ float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); }
125  __device__ __forceinline__ float inner_r(const HitsConstView& hh) const { return theInnerR; }
126  // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
127  __device__ __forceinline__ float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); }
128 
129  __device__ __forceinline__ auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); }
130  __device__ __forceinline__ auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); }
131 
132  __device__ __forceinline__ float inner_detIndex(const HitsConstView& hh) const {
133  return hh[theInnerHitId].detectorIndex();
134  }
135  __device__ __forceinline__ float outer_detIndex(const HitsConstView& hh) const {
136  return hh[theOuterHitId].detectorIndex();
137  }
138 
139  constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
140  constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
141 
142  __device__ void print_cell() const {
143  printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
145  theInnerHitId,
146  theOuterHitId);
147  }
148 
150  GPUCACellT const& otherCell,
151  const float ptmin,
152  const float hardCurvCut,
153  const float caThetaCutBarrel,
154  const float caThetaCutForward,
155  const float dcaCutInnerTriplet,
156  const float dcaCutOuterTriplet) const {
157  // detIndex of the layerStart for the Phase1 Pixel Detector:
158  // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
159  // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
160  auto ri = inner_r(hh);
161  auto zi = inner_z(hh);
162 
163  auto ro = outer_r(hh);
164  auto zo = outer_z(hh);
165 
166  auto r1 = otherCell.inner_r(hh);
167  auto z1 = otherCell.inner_z(hh);
168  auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
169  bool aligned = areAlignedRZ(r1,
170  z1,
171  ri,
172  zi,
173  ro,
174  zo,
175  ptmin,
176  isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
177  return (aligned && dcaCut(hh,
178  otherCell,
179  otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet
181  hardCurvCut)); // FIXME tune cuts
182  }
183 
184  __device__ __forceinline__ static bool areAlignedRZ(
185  float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
186  float radius_diff = std::abs(r1 - ro);
187  float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
188 
189  float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
190  // radius_diff later
191 
192  float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
194  }
195 
196  __device__ inline bool dcaCut(const HitsConstView& hh,
197  GPUCACellT const& otherCell,
198  const float region_origin_radius_plus_tolerance,
199  const float maxCurv) const {
200  auto x1 = otherCell.inner_x(hh);
201  auto y1 = otherCell.inner_y(hh);
202 
203  auto x2 = inner_x(hh);
204  auto y2 = inner_y(hh);
205 
206  auto x3 = outer_x(hh);
207  auto y3 = outer_y(hh);
208 
209  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
210 
211  if (eq.curvature() > maxCurv)
212  return false;
213 
214  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
215  }
216 
217  __device__ __forceinline__ static bool dcaCutH(float x1,
218  float y1,
219  float x2,
220  float y2,
221  float x3,
222  float y3,
223  const float region_origin_radius_plus_tolerance,
224  const float maxCurv) {
225  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
226 
227  if (eq.curvature() > maxCurv)
228  return false;
229 
230  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
231  }
232 
233  __device__ inline bool hole0(const HitsConstView& hh, GPUCACellT const& innerCell) const {
234  using namespace phase1PixelTopology;
235 
236  int p = innerCell.inner_iphi(hh);
237  if (p < 0)
240  p %= max_ladder_bpx0;
241  auto il = first_ladder_bpx0 + p;
242  auto r0 = hh.averageGeometry().ladderR[il];
243  auto ri = innerCell.inner_r(hh);
244  auto zi = innerCell.inner_z(hh);
245  auto ro = outer_r(hh);
246  auto zo = outer_z(hh);
247  auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
248  auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
249  auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
250  auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
251  return gap;
252  }
253 
254  __device__ inline bool hole4(const HitsConstView& hh, GPUCACellT const& innerCell) const {
255  using namespace phase1PixelTopology;
256 
257  int p = outer_iphi(hh);
258  if (p < 0)
261  p %= max_ladder_bpx4;
262  auto il = first_ladder_bpx4 + p;
263  auto r4 = hh.averageGeometry().ladderR[il];
264  auto ri = innerCell.inner_r(hh);
265  auto zi = innerCell.inner_z(hh);
266  auto ro = outer_r(hh);
267  auto zo = outer_z(hh);
268  auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
269  auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
270  auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
271  auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
272  auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
273  auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
274  return gap || holeP || holeN;
275  }
276 
277  // trying to free the track building process from hardcoded layers, leaving
278  // the visit of the graph based on the neighborhood connections between cells.
279 
280  template <int DEPTH>
281  __device__ inline void find_ntuplets(const HitsConstView& hh,
282  GPUCACellT* __restrict__ cells,
286  Quality* __restrict__ quality,
287  TmpTuple& tmpNtuplet,
288  const unsigned int minHitsPerNtuplet,
289  bool startAt0) const {
290  // the building process for a track ends if:
291  // it has no right neighbor
292  // it has no compatible neighbor
293  // the ntuplets is then saved if the number of hits it contains is greater
294  // than a threshold
295 
296  if constexpr (DEPTH <= 0) {
297  printf("ERROR: GPUCACellT::find_ntuplets reached full depth!\n");
298 #ifdef __CUDA_ARCH__
299  __trap();
300 #else
301  abort();
302 #endif
303  } else {
304  auto doubletId = this - cells;
305  tmpNtuplet.push_back_unsafe(doubletId);
306  assert(tmpNtuplet.size() <=
307  int(TrackerTraits::maxHitsOnTrack -
308  3)); //1 for the container, 1 because these are doublets, 1 because we may push another
309 
310  bool last = true;
311  for (unsigned int otherCell : outerNeighbors()) {
312  if (cells[otherCell].isKilled())
313  continue; // killed by earlyFishbone
314  last = false;
315  cells[otherCell].template find_ntuplets<DEPTH - 1>(
316  hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
317  }
318 
319  if (last) { // if long enough save...
320  if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
321 #ifdef ONLY_TRIPLETS_IN_HOLE
322  // triplets accepted only pointing to the hole
323  if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
324  ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
325 #endif
326  {
328  auto nh = 0U;
329  constexpr int maxFB = 2; // for the time being let's limit this
330  int nfb = 0;
331  for (auto c : tmpNtuplet) {
332  hits[nh++] = cells[c].theInnerHitId;
333  if (nfb < maxFB && cells[c].hasFishbone()) {
334  ++nfb;
335  hits[nh++] = cells[c].theFishboneId; // fishbone hit is always outer than inner hit
336  }
337  }
338  assert(nh < TrackerTraits::maxHitsOnTrack);
339  hits[nh] = theOuterHitId;
340  auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
341  if (it >= 0) { // if negative is overflow....
342  for (auto c : tmpNtuplet)
343  cells[c].addTrack(it, cellTracks);
344  quality[it] = bad; // initialize to bad
345  }
346  }
347  }
348  }
349  tmpNtuplet.pop_back();
350  assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1));
351  }
352  }
353 
354  // Cell status management
355  __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
356  __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
357 
358  __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
359 
360  __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
361  __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
362 
363  __device__ __forceinline__ void setFishbone(hindex_type id, float z, const HitsConstView& hh) {
364  // make it deterministic: use the farther apart (in z)
365  auto old = theFishboneId;
366  while (old !=
367  atomicCAS(
368  &theFishboneId,
369  old,
370  (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old))
371  old = theFishboneId;
372  }
373  __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
374  __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; }
375 
376 private:
379 
380  int16_t theLayerPairId_;
381  uint16_t theStatus_; // tbd
382 
383  float theInnerZ;
384  float theInnerR;
385  hindex_type theInnerHitId;
388 };
389 
390 #endif // RecoTracker_PixelSeeding_plugins_GPUCACell_h
typename TrackSoA< TrackerTraits >::HitContainer HitContainer
Definition: GPUCACell.h:41
caStructures::CellTracksVectorT< TrackerTraits > CellTracksVector
Definition: GPUCACell.h:32
__device__ float float ri
Definition: GPUCACell.h:185
#define __forceinline__
Definition: cudaCompat.h:22
__device__ CellTracksVector const HitsConstView int layerPairId
Definition: GPUCACell.h:50
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
static constexpr auto maxCellsPerHit
Definition: GPUCACell.h:26
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const * cellNeighbors
constexpr float module_length_bpx0
__device__ bool check_alignment(const HitsConstView &hh, GPUCACellT const &otherCell, const float ptmin, const float hardCurvCut, const float caThetaCutBarrel, const float caThetaCutForward, const float dcaCutInnerTriplet, const float dcaCutOuterTriplet) const
Definition: GPUCACell.h:149
__device__ float float float float float const float const float thetaCut
Definition: GPUCACell.h:185
int init
Definition: HydjetWrapper.h:66
pixelTrack::Quality Quality
Definition: GPUCACell.h:42
__device__ float float float float float zo
Definition: GPUCACell.h:185
cms::cuda::VecArray< uint32_t, TrackerTraits::maxDepth > TmpTuple
Definition: GPUCACell.h:39
GPUCACellT()=default
constexpr float module_length_bpx4
constexpr uint32_t mask
Definition: gpuClustering.h:26
__device__ void print_cell() const
Definition: GPUCACell.h:142
typename TrackerTraits::hindex_type hindex_type
Definition: GPUCACell.h:35
typename TrackerTraits::tindex_type tindex_type
Definition: GPUCACell.h:36
string quality
constexpr uint32_t first_ladder_bpx4
constexpr float module_tolerance_bpx0
assert(outerNeighbors().empty())
T sqrt(T t)
Definition: SSEVec.h:19
caStructures::CellTracksT< TrackerTraits > CellTracks
Definition: GPUCACell.h:30
__device__ float float float zi
Definition: GPUCACell.h:185
float distance_13_squared
Definition: GPUCACell.h:187
float pMin
Definition: GPUCACell.h:189
__device__ CellTracksVector const HitsConstView & hh
Definition: GPUCACell.h:50
__device__ int extend(int size=1)
Definition: SimpleVector.h:84
constexpr uint32_t max_ladder_bpx4
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
uint32_t nh
caStructures::CellNeighborsT< TrackerTraits > CellNeighbors
Definition: GPUCACell.h:29
constexpr unsigned int outer_hit_id() const
Definition: GPUCACell.h:140
__device__ float float float float float const float ptmin
Definition: GPUCACell.h:185
constexpr unsigned int inner_hit_id() const
Definition: GPUCACell.h:139
constexpr uint32_t max_ladder_bpx0
caStructures::CellNeighborsVectorT< TrackerTraits > CellNeighborsVector
Definition: GPUCACell.h:31
__device__ CellTracksVector const HitsConstView int hindex_type innerHitId
Definition: GPUCACell.h:50
typename TrackingRecHitSoA< TrackerTraits >::template TrackingRecHitSoALayout<>::ConstView TrackingRecHitSoAConstView
return tracks().push_back(t)
__device__ float z1
Definition: GPUCACell.h:185
__device__ CellTracksVector const HitsConstView int hindex_type hindex_type outerHitId
Definition: GPUCACell.h:54
constexpr void reset()
Definition: SimpleVector.h:108
static constexpr auto invalidHitId
Definition: GPUCACell.h:37
float tan_12_13_half_mul_distance_13_squared
Definition: GPUCACell.h:192
static constexpr auto bad
Definition: GPUCACell.h:43
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
constexpr float module_tolerance_bpx4
unsigned long long PtrAsInt
Definition: GPUCACell.h:24
constexpr uint32_t first_ladder_bpx0
#define __device__
void __threadfence()
Definition: cudaCompat.h:133
__device__ float float float float ro
Definition: GPUCACell.h:185
theOuterNeighbors
Definition: GPUCACell.h:66
__device__ CellTracksVector & cellTracks
Definition: GPUCACell.h:50