CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
GPUCACell.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
3 
4 //
5 // Author: Felice Pantaleo, CERN
6 //
7 
8 // #define ONLY_TRIPLETS_IN_HOLE
9 
10 #include <cuda_runtime.h>
11 
18 #include "CAConstants.h"
19 
20 class GPUCACell {
21 public:
22  using PtrAsInt = unsigned long long;
23 
31 
34 
36 
39  static constexpr auto bad = pixelTrack::Quality::bad;
40 
41  enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
42 
43  GPUCACell() = default;
44 
47  Hits const& hh,
48  int layerPairId,
51  theInnerHitId = innerHitId;
56 
57  // optimization that depends on access pattern
58  theInnerZ = hh.zGlobal(innerHitId);
59  theInnerR = hh.rGlobal(innerHitId);
60 
61  // link to default empty
62  theOuterNeighbors = &cellNeighbors[0];
63  theTracks = &cellTracks[0];
64  assert(outerNeighbors().empty());
65  assert(tracks().empty());
66  }
67 
68  __device__ __forceinline__ int addOuterNeighbor(CellNeighbors::value_t t, CellNeighborsVector& cellNeighbors) {
69  // use smart cache
70  if (outerNeighbors().empty()) {
71  auto i = cellNeighbors.extend(); // maybe wasted....
72  if (i > 0) {
73  cellNeighbors[i].reset();
74  __threadfence();
75 #ifdef __CUDACC__
76  auto zero = (PtrAsInt)(&cellNeighbors[0]);
78  zero,
79  (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back...
80 #else
81  theOuterNeighbors = &cellNeighbors[i];
82 #endif
83  } else
84  return -1;
85  }
86  __threadfence();
87  return outerNeighbors().push_back(t);
88  }
89 
91  if (tracks().empty()) {
92  auto i = cellTracks.extend(); // maybe wasted....
93  if (i > 0) {
94  cellTracks[i].reset();
95  __threadfence();
96 #ifdef __CUDACC__
97  auto zero = (PtrAsInt)(&cellTracks[0]);
98  atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back...
99 #else
100  theTracks = &cellTracks[i];
101 #endif
102  } else
103  return -1;
104  }
105  __threadfence();
106  return tracks().push_back(t);
107  }
108 
112  __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
113  __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); }
114  __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); }
115  __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); }
116  __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); }
117  __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; }
118  // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
119  __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); }
120  __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; }
121  // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
122  __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); }
123 
124  __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); }
125  __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); }
126 
127  __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); }
128  __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); }
129 
130  constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
131  constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
132 
133  __device__ void print_cell() const {
134  printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
136  theInnerHitId,
137  theOuterHitId);
138  }
139 
141  GPUCACell const& otherCell,
142  const float ptmin,
143  const float hardCurvCut,
144  const float caThetaCutBarrel,
145  const float caThetaCutForward,
146  const float dcaCutInnerTriplet,
147  const float dcaCutOuterTriplet) const {
148  // detIndex of the layerStart for the Phase1 Pixel Detector:
149  // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
150  // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
151  auto ri = inner_r(hh);
152  auto zi = inner_z(hh);
153 
154  auto ro = outer_r(hh);
155  auto zo = outer_z(hh);
156 
157  auto r1 = otherCell.inner_r(hh);
158  auto z1 = otherCell.inner_z(hh);
159  auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex;
160  bool aligned = areAlignedRZ(r1,
161  z1,
162  ri,
163  zi,
164  ro,
165  zo,
166  ptmin,
167  isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
168  return (aligned && dcaCut(hh,
169  otherCell,
170  otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
171  : dcaCutOuterTriplet,
172  hardCurvCut)); // FIXME tune cuts
173  }
174 
175  __device__ __forceinline__ static bool areAlignedRZ(
176  float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
177  float radius_diff = std::abs(r1 - ro);
178  float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
179 
180  float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
181  // radius_diff later
182 
183  float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
184  return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
185  }
186 
187  __device__ inline bool dcaCut(Hits const& hh,
188  GPUCACell const& otherCell,
189  const float region_origin_radius_plus_tolerance,
190  const float maxCurv) const {
191  auto x1 = otherCell.inner_x(hh);
192  auto y1 = otherCell.inner_y(hh);
193 
194  auto x2 = inner_x(hh);
195  auto y2 = inner_y(hh);
196 
197  auto x3 = outer_x(hh);
198  auto y3 = outer_y(hh);
199 
200  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
201 
202  if (eq.curvature() > maxCurv)
203  return false;
204 
205  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
206  }
207 
208  __device__ __forceinline__ static bool dcaCutH(float x1,
209  float y1,
210  float x2,
211  float y2,
212  float x3,
213  float y3,
214  const float region_origin_radius_plus_tolerance,
215  const float maxCurv) {
216  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
217 
218  if (eq.curvature() > maxCurv)
219  return false;
220 
221  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
222  }
223 
224  __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const {
229  int p = innerCell.inner_iphi(hh);
230  if (p < 0)
233  p %= max_ladder_bpx0;
234  auto il = first_ladder_bpx0 + p;
235  auto r0 = hh.averageGeometry().ladderR[il];
236  auto ri = innerCell.inner_r(hh);
237  auto zi = innerCell.inner_z(hh);
238  auto ro = outer_r(hh);
239  auto zo = outer_z(hh);
240  auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
241  auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
242  auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
243  auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
244  return gap;
245  }
246 
247  __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const {
252  int p = outer_iphi(hh);
253  if (p < 0)
256  p %= max_ladder_bpx4;
257  auto il = first_ladder_bpx4 + p;
258  auto r4 = hh.averageGeometry().ladderR[il];
259  auto ri = innerCell.inner_r(hh);
260  auto zi = innerCell.inner_z(hh);
261  auto ro = outer_r(hh);
262  auto zo = outer_z(hh);
263  auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
264  auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
265  auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
266  auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
267  auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
268  auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
269  return gap || holeP || holeN;
270  }
271 
272  // trying to free the track building process from hardcoded layers, leaving
273  // the visit of the graph based on the neighborhood connections between cells.
274  template <int DEPTH>
275  __device__ inline void find_ntuplets(Hits const& hh,
276  GPUCACell* __restrict__ cells,
277  CellTracksVector& cellTracks,
280  Quality* __restrict__ quality,
281  TmpTuple& tmpNtuplet,
282  const unsigned int minHitsPerNtuplet,
283  bool startAt0) const {
284  // the building process for a track ends if:
285  // it has no right neighbor
286  // it has no compatible neighbor
287  // the ntuplets is then saved if the number of hits it contains is greater
288  // than a threshold
289 
290  auto doubletId = this - cells;
291  tmpNtuplet.push_back_unsafe(doubletId);
292  assert(tmpNtuplet.size() <= 4);
293 
294  bool last = true;
295  for (unsigned int otherCell : outerNeighbors()) {
296  if (cells[otherCell].isKilled())
297  continue; // killed by earlyFishbone
298  last = false;
299  cells[otherCell].find_ntuplets<DEPTH - 1>(
300  hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
301  }
302  if (last) { // if long enough save...
303  if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
304 #ifdef ONLY_TRIPLETS_IN_HOLE
305  // triplets accepted only pointing to the hole
306  if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
307  ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
308 #endif
309  {
310  hindex_type hits[8];
311  auto nh = 0U;
312  constexpr int maxFB = 2; // for the time being let's limit this
313  int nfb = 0;
314  for (auto c : tmpNtuplet) {
315  hits[nh++] = cells[c].theInnerHitId;
316  if (nfb < maxFB && cells[c].hasFishbone()) {
317  ++nfb;
318  hits[nh++] = cells[c].theFishboneId; // fishbone hit is always outer than inner hit
319  }
320  }
322  hits[nh] = theOuterHitId;
323  auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
324  if (it >= 0) { // if negative is overflow....
325  for (auto c : tmpNtuplet)
326  cells[c].addTrack(it, cellTracks);
327  quality[it] = bad; // initialize to bad
328  }
329  }
330  }
331  }
332  tmpNtuplet.pop_back();
333  assert(tmpNtuplet.size() < 4);
334  }
335 
336  // Cell status management
337  __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
338  __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
339 
340  __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
341 
342  __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
343  __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
344 
345  __device__ __forceinline__ void setFishbone(hindex_type id) { theFishboneId = id; }
346  __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
347  __device__ __forceinline__ bool hasFishbone() const {
349  }
350 
351 private:
354 
355  int16_t theLayerPairId_;
356  uint16_t theStatus_; // tbd
357 
358  float theInnerZ;
359  float theInnerR;
360  hindex_type theInnerHitId;
363 };
364 
365 template <>
366 __device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh,
367  GPUCACell* __restrict__ cells,
371  Quality* __restrict__ quality,
372  TmpTuple& tmpNtuplet,
373  const unsigned int minHitsPerNtuplet,
374  bool startAt0) const {
375  printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n");
376 #ifdef __CUDA_ARCH__
377  __trap();
378 #else
379  abort();
380 #endif
381 }
382 
383 #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
unsigned long long PtrAsInt
Definition: GPUCACell.h:22
constexpr int32_t maxHitsOnTrack
Definition: CAConstants.h:47
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:72
TrackingRecHit2DSOAView Hits
Definition: GPUCACell.h:32
cms::cuda::VecArray< uint32_t, 6 > TmpTuple
Definition: GPUCACell.h:35
constexpr uint32_t first_ladder_bpx4
Definition: CAConstants.h:55
float tan_12_13_half_mul_distance_13_squared
Definition: GPUCACell.h:183
const edm::EventSetup & c
float pMin
Definition: GPUCACell.h:180
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:74
#define __forceinline__
Definition: cudaCompat.h:22
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
uint16_t *__restrict__ id
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const * cellNeighbors
uint32_t const *__restrict__ Quality * quality
auto const & foundNtuplets
int init
Definition: HydjetWrapper.h:64
__device__ float float float float ro
Definition: GPUCACell.h:176
__device__ float float float float float const float const float thetaCut
Definition: GPUCACell.h:176
auto const & tracks
cannot be loose
theLayerPairId_
Definition: GPUCACell.h:53
Hits::hindex_type hindex_type
Definition: GPUCACell.h:33
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter * apc
__device__ float float float float float zo
Definition: GPUCACell.h:176
cms::cuda::VecArray< uint32_t, maxCellsPerHit > OuterHitOfCellContainer
Definition: CAConstants.h:77
constexpr uint32_t max_ladder_bpx4
Definition: CAConstants.h:54
__device__ float float float zi
Definition: GPUCACell.h:176
float distance_13_squared
Definition: GPUCACell.h:178
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:75
constexpr float module_length_bpx4
Definition: CAConstants.h:58
__device__ void print_cell() const
Definition: GPUCACell.h:133
T sqrt(T t)
Definition: SSEVec.h:19
printf("params %d %f %f %f\n", minT, eps, errmax, chi2max)
__device__ int extend(int size=1)
Definition: SimpleVector.h:84
constexpr uint32_t max_ladder_bpx0
Definition: CAConstants.h:50
__device__ CellTracksVector Hits const int hindex_type innerHitId
Definition: GPUCACell.h:46
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
static constexpr auto bad
Definition: GPUCACell.h:39
TrackSoA::HitContainer HitContainer
__device__ float float float float float const float ptmin
Definition: GPUCACell.h:176
__device__ CellTracksVector Hits const int hindex_type hindex_type outerHitId
Definition: GPUCACell.h:50
assert(outerNeighbors().empty())
uint32_t nh
constexpr float module_length_bpx0
Definition: CAConstants.h:52
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:27
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ cells
constexpr uint32_t first_ladder_bpx0
Definition: CAConstants.h:51
GPUCACell()=default
caConstants::CellTracks CellTracks
Definition: GPUCACell.h:28
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const * cellTracks
static constexpr auto maxCellsPerHit
Definition: GPUCACell.h:24
constexpr unsigned int outer_hit_id() const
Definition: GPUCACell.h:131
constexpr void reset()
Definition: SimpleVector.h:108
caConstants::CellTracksVector CellTracksVector
Definition: GPUCACell.h:30
__device__ CellTracksVector & cellTracks
Definition: GPUCACell.h:46
constexpr float module_tolerance_bpx0
Definition: CAConstants.h:53
constexpr unsigned int inner_hit_id() const
Definition: GPUCACell.h:130
constexpr uint32_t maxCellsPerHit
Definition: CAConstants.h:38
__device__ float z1
Definition: GPUCACell.h:176
caConstants::CellNeighborsVector CellNeighborsVector
Definition: GPUCACell.h:29
tuple last
Definition: dqmdumpme.py:56
__device__ CellTracksVector Hits const & hh
Definition: GPUCACell.h:46
constexpr uint32_t last_bpix1_detIndex
Definition: CAConstants.h:64
constexpr uint32_t last_barrel_detIndex
Definition: CAConstants.h:65
theOuterNeighbors
Definition: GPUCACell.h:62
#define __device__
__device__ bool check_alignment(Hits const &hh, GPUCACell const &otherCell, const float ptmin, const float hardCurvCut, const float caThetaCutBarrel, const float caThetaCutForward, const float dcaCutInnerTriplet, const float dcaCutOuterTriplet) const
Definition: GPUCACell.h:140
__device__ CellTracksVector Hits const int layerPairId
Definition: GPUCACell.h:46
void __threadfence()
Definition: cudaCompat.h:109
auto const & hh
constexpr float module_tolerance_bpx4
Definition: CAConstants.h:59
__device__ float float ri
Definition: GPUCACell.h:176
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:71