CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
GPUCACell.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
3 
4 //
5 // Author: Felice Pantaleo, CERN
6 //
7 
8 // #define ONLY_TRIPLETS_IN_HOLE
9 
10 #include <cuda_runtime.h>
11 
18 #include "CAConstants.h"
19 
20 class GPUCACell {
21 public:
22  using PtrAsInt = unsigned long long;
23 
32 
35 
37 
40  static constexpr auto bad = pixelTrack::Quality::bad;
41 
42  enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
43 
44  GPUCACell() = default;
45 
48  Hits const& hh,
49  int layerPairId,
52  theInnerHitId = innerHitId;
57 
58  // optimization that depends on access pattern
59  theInnerZ = hh.zGlobal(innerHitId);
60  theInnerR = hh.rGlobal(innerHitId);
61 
62  // link to default empty
63  theOuterNeighbors = &cellNeighbors[0];
64  theTracks = &cellTracks[0];
65  assert(outerNeighbors().empty());
66  assert(tracks().empty());
67  }
68 
69  __device__ __forceinline__ int addOuterNeighbor(CellNeighbors::value_t t, CellNeighborsVector& cellNeighbors) {
70  // use smart cache
71  if (outerNeighbors().empty()) {
72  auto i = cellNeighbors.extend(); // maybe wasted....
73  if (i > 0) {
74  cellNeighbors[i].reset();
75  __threadfence();
76 #ifdef __CUDACC__
77  auto zero = (PtrAsInt)(&cellNeighbors[0]);
79  zero,
80  (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back...
81 #else
82  theOuterNeighbors = &cellNeighbors[i];
83 #endif
84  } else
85  return -1;
86  }
87  __threadfence();
88  return outerNeighbors().push_back(t);
89  }
90 
92  if (tracks().empty()) {
93  auto i = cellTracks.extend(); // maybe wasted....
94  if (i > 0) {
95  cellTracks[i].reset();
96  __threadfence();
97 #ifdef __CUDACC__
98  auto zero = (PtrAsInt)(&cellTracks[0]);
99  atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back...
100 #else
101  theTracks = &cellTracks[i];
102 #endif
103  } else
104  return -1;
105  }
106  __threadfence();
107  return tracks().push_back(t);
108  }
109 
113  __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
114  __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); }
115  __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); }
116  __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); }
117  __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); }
118  __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; }
119  // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
120  __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); }
121  __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; }
122  // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
123  __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); }
124 
125  __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); }
126  __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); }
127 
128  __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); }
129  __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); }
130 
131  constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
132  constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
133 
134  __device__ void print_cell() const {
135  printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
137  theInnerHitId,
138  theOuterHitId);
139  }
140 
142  GPUCACell const& otherCell,
143  const float ptmin,
144  const float hardCurvCut,
145  const float caThetaCutBarrel,
146  const float caThetaCutForward,
147  const float dcaCutInnerTriplet,
148  const float dcaCutOuterTriplet) const {
149  // detIndex of the layerStart for the Phase1 Pixel Detector:
150  // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
151  // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
152  auto ri = inner_r(hh);
153  auto zi = inner_z(hh);
154 
155  auto ro = outer_r(hh);
156  auto zo = outer_z(hh);
157 
158  auto r1 = otherCell.inner_r(hh);
159  auto z1 = otherCell.inner_z(hh);
160  auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex;
161  bool aligned = areAlignedRZ(r1,
162  z1,
163  ri,
164  zi,
165  ro,
166  zo,
167  ptmin,
168  isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
169  return (aligned && dcaCut(hh,
170  otherCell,
171  otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
172  : dcaCutOuterTriplet,
173  hardCurvCut)); // FIXME tune cuts
174  }
175 
176  __device__ __forceinline__ static bool areAlignedRZ(
177  float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
178  float radius_diff = std::abs(r1 - ro);
179  float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
180 
181  float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
182  // radius_diff later
183 
184  float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
185  return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
186  }
187 
188  __device__ inline bool dcaCut(Hits const& hh,
189  GPUCACell const& otherCell,
190  const float region_origin_radius_plus_tolerance,
191  const float maxCurv) const {
192  auto x1 = otherCell.inner_x(hh);
193  auto y1 = otherCell.inner_y(hh);
194 
195  auto x2 = inner_x(hh);
196  auto y2 = inner_y(hh);
197 
198  auto x3 = outer_x(hh);
199  auto y3 = outer_y(hh);
200 
201  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
202 
203  if (eq.curvature() > maxCurv)
204  return false;
205 
206  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
207  }
208 
209  __device__ __forceinline__ static bool dcaCutH(float x1,
210  float y1,
211  float x2,
212  float y2,
213  float x3,
214  float y3,
215  const float region_origin_radius_plus_tolerance,
216  const float maxCurv) {
217  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
218 
219  if (eq.curvature() > maxCurv)
220  return false;
221 
222  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
223  }
224 
225  __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const {
230  int p = innerCell.inner_iphi(hh);
231  if (p < 0)
234  p %= max_ladder_bpx0;
235  auto il = first_ladder_bpx0 + p;
236  auto r0 = hh.averageGeometry().ladderR[il];
237  auto ri = innerCell.inner_r(hh);
238  auto zi = innerCell.inner_z(hh);
239  auto ro = outer_r(hh);
240  auto zo = outer_z(hh);
241  auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
242  auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
243  auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
244  auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
245  return gap;
246  }
247 
248  __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const {
253  int p = outer_iphi(hh);
254  if (p < 0)
257  p %= max_ladder_bpx4;
258  auto il = first_ladder_bpx4 + p;
259  auto r4 = hh.averageGeometry().ladderR[il];
260  auto ri = innerCell.inner_r(hh);
261  auto zi = innerCell.inner_z(hh);
262  auto ro = outer_r(hh);
263  auto zo = outer_z(hh);
264  auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
265  auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
266  auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
267  auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
268  auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
269  auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
270  return gap || holeP || holeN;
271  }
272 
273  // trying to free the track building process from hardcoded layers, leaving
274  // the visit of the graph based on the neighborhood connections between cells.
275  template <int DEPTH>
276  __device__ inline void find_ntuplets(Hits const& hh,
277  GPUCACell* __restrict__ cells,
278  CellTracksVector& cellTracks,
281  Quality* __restrict__ quality,
282  TmpTuple& tmpNtuplet,
283  const unsigned int minHitsPerNtuplet,
284  bool startAt0) const {
285  // the building process for a track ends if:
286  // it has no right neighbor
287  // it has no compatible neighbor
288  // the ntuplets is then saved if the number of hits it contains is greater
289  // than a threshold
290 
291  auto doubletId = this - cells;
292  tmpNtuplet.push_back_unsafe(doubletId);
293  assert(tmpNtuplet.size() <= 4);
294 
295  bool last = true;
296  for (unsigned int otherCell : outerNeighbors()) {
297  if (cells[otherCell].isKilled())
298  continue; // killed by earlyFishbone
299  last = false;
300  cells[otherCell].find_ntuplets<DEPTH - 1>(
301  hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
302  }
303  if (last) { // if long enough save...
304  if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
305 #ifdef ONLY_TRIPLETS_IN_HOLE
306  // triplets accepted only pointing to the hole
307  if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
308  ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
309 #endif
310  {
311  hindex_type hits[8];
312  auto nh = 0U;
313  constexpr int maxFB = 2; // for the time being let's limit this
314  int nfb = 0;
315  for (auto c : tmpNtuplet) {
316  hits[nh++] = cells[c].theInnerHitId;
317  if (nfb < maxFB && cells[c].hasFishbone()) {
318  ++nfb;
319  hits[nh++] = cells[c].theFishboneId; // fishbone hit is always outer than inner hit
320  }
321  }
323  hits[nh] = theOuterHitId;
324  auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
325  if (it >= 0) { // if negative is overflow....
326  for (auto c : tmpNtuplet)
327  cells[c].addTrack(it, cellTracks);
328  quality[it] = bad; // initialize to bad
329  }
330  }
331  }
332  }
333  tmpNtuplet.pop_back();
334  assert(tmpNtuplet.size() < 4);
335  }
336 
337  // Cell status management
338  __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
339  __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
340 
341  __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
342 
343  __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
344  __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
345 
346  __device__ __forceinline__ void setFishbone(hindex_type id, float z, Hits const& hh) {
347  // make it deterministic: use the farther apart (in z)
348  auto old = theFishboneId;
349  while (
350  old !=
352  old,
353  (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh.zGlobal(old) - theInnerZ)) ? id : old))
354  old = theFishboneId;
355  }
356  __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
357  __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; }
358 
359 private:
362 
363  int16_t theLayerPairId_;
364  uint16_t theStatus_; // tbd
365 
366  float theInnerZ;
367  float theInnerR;
368  hindex_type theInnerHitId;
371 };
372 
373 template <>
374 __device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh,
375  GPUCACell* __restrict__ cells,
379  Quality* __restrict__ quality,
380  TmpTuple& tmpNtuplet,
381  const unsigned int minHitsPerNtuplet,
382  bool startAt0) const {
383  printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n");
384 #ifdef __CUDA_ARCH__
385  __trap();
386 #else
387  abort();
388 #endif
389 }
390 
391 #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
unsigned long long PtrAsInt
Definition: GPUCACell.h:22
constexpr int32_t maxHitsOnTrack
Definition: CAConstants.h:47
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:72
TrackingRecHit2DSOAView Hits
Definition: GPUCACell.h:33
cms::cuda::VecArray< uint32_t, 6 > TmpTuple
Definition: GPUCACell.h:36
constexpr uint32_t first_ladder_bpx4
Definition: CAConstants.h:55
float tan_12_13_half_mul_distance_13_squared
Definition: GPUCACell.h:184
const edm::EventSetup & c
float pMin
Definition: GPUCACell.h:181
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:74
#define __forceinline__
Definition: cudaCompat.h:22
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const * cellNeighbors
uint32_t const *__restrict__ Quality * quality
int init
Definition: HydjetWrapper.h:64
__device__ float float float float ro
Definition: GPUCACell.h:177
__device__ float float float float float const float const float thetaCut
Definition: GPUCACell.h:177
auto const & tracks
cannot be loose
theLayerPairId_
Definition: GPUCACell.h:54
Hits::hindex_type hindex_type
Definition: GPUCACell.h:34
__device__ float float float float float zo
Definition: GPUCACell.h:177
cms::cuda::VecArray< uint32_t, maxCellsPerHit > OuterHitOfCellContainer
Definition: CAConstants.h:77
constexpr uint32_t max_ladder_bpx4
Definition: CAConstants.h:54
__device__ float float float zi
Definition: GPUCACell.h:177
auto const & foundNtuplets
float distance_13_squared
Definition: GPUCACell.h:179
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:75
constexpr float module_length_bpx4
Definition: CAConstants.h:58
__device__ void print_cell() const
Definition: GPUCACell.h:134
T sqrt(T t)
Definition: SSEVec.h:19
printf("params %d %f %f %f\n", minT, eps, errmax, chi2max)
__device__ int extend(int size=1)
Definition: SimpleVector.h:84
constexpr uint32_t max_ladder_bpx0
Definition: CAConstants.h:50
__device__ CellTracksVector Hits const int hindex_type innerHitId
Definition: GPUCACell.h:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
static constexpr auto bad
Definition: GPUCACell.h:40
TrackSoA::HitContainer HitContainer
__device__ float float float float float const float ptmin
Definition: GPUCACell.h:177
__device__ CellTracksVector Hits const int hindex_type hindex_type outerHitId
Definition: GPUCACell.h:51
assert(outerNeighbors().empty())
uint32_t nh
constexpr float module_length_bpx0
Definition: CAConstants.h:52
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:28
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ cells
constexpr uint32_t first_ladder_bpx0
Definition: CAConstants.h:51
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter * apc
GPUCACell()=default
caConstants::CellTracks CellTracks
Definition: GPUCACell.h:29
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const * cellTracks
static constexpr auto invalidHitId
Definition: GPUCACell.h:25
static constexpr auto maxCellsPerHit
Definition: GPUCACell.h:24
constexpr unsigned int outer_hit_id() const
Definition: GPUCACell.h:132
constexpr void reset()
Definition: SimpleVector.h:108
caConstants::CellTracksVector CellTracksVector
Definition: GPUCACell.h:31
__device__ CellTracksVector & cellTracks
Definition: GPUCACell.h:47
constexpr float module_tolerance_bpx0
Definition: CAConstants.h:53
constexpr unsigned int inner_hit_id() const
Definition: GPUCACell.h:131
constexpr uint32_t maxCellsPerHit
Definition: CAConstants.h:38
__device__ float z1
Definition: GPUCACell.h:177
caConstants::CellNeighborsVector CellNeighborsVector
Definition: GPUCACell.h:30
tuple last
Definition: dqmdumpme.py:56
__device__ CellTracksVector Hits const & hh
Definition: GPUCACell.h:47
constexpr uint32_t last_bpix1_detIndex
Definition: CAConstants.h:64
constexpr uint32_t last_barrel_detIndex
Definition: CAConstants.h:65
theOuterNeighbors
Definition: GPUCACell.h:63
#define __device__
__device__ bool check_alignment(Hits const &hh, GPUCACell const &otherCell, const float ptmin, const float hardCurvCut, const float caThetaCutBarrel, const float caThetaCutForward, const float dcaCutInnerTriplet, const float dcaCutOuterTriplet) const
Definition: GPUCACell.h:141
__device__ CellTracksVector Hits const int layerPairId
Definition: GPUCACell.h:47
void __threadfence()
Definition: cudaCompat.h:109
auto const & hh
constexpr float module_tolerance_bpx4
Definition: CAConstants.h:59
__device__ float float ri
Definition: GPUCACell.h:177
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:71