CMS 3D CMS Logo

GPUCACell.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
3 
4 //
5 // Author: Felice Pantaleo, CERN
6 //
7 
8 // #define ONLY_TRIPLETS_IN_HOLE
9 
10 #include <cuda_runtime.h>
11 
18 #include "CAConstants.h"
19 
20 class GPUCACell {
21 public:
22  using PtrAsInt = unsigned long long;
23 
30 
33 
35 
38  static constexpr auto bad = pixelTrack::Quality::bad;
39 
40  GPUCACell() = default;
41 
44  Hits const& hh,
45  int layerPairId,
46  int doubletId,
49  theInnerHitId = innerHitId;
53  theUsed_ = 0;
54 
55  // optimization that depends on access pattern
56  theInnerZ = hh.zGlobal(innerHitId);
57  theInnerR = hh.rGlobal(innerHitId);
58 
59  // link to default empty
62  assert(outerNeighbors().empty());
63  assert(tracks().empty());
64  }
65 
67  // use smart cache
68  if (outerNeighbors().empty()) {
69  auto i = cellNeighbors.extend(); // maybe wasted....
70  if (i > 0) {
71  cellNeighbors[i].reset();
72  __threadfence();
73 #ifdef __CUDACC__
74  auto zero = (PtrAsInt)(&cellNeighbors[0]);
76  zero,
77  (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back...
78 #else
80 #endif
81  } else
82  return -1;
83  }
84  __threadfence();
85  return outerNeighbors().push_back(t);
86  }
87 
89  if (tracks().empty()) {
90  auto i = cellTracks.extend(); // maybe wasted....
91  if (i > 0) {
92  cellTracks[i].reset();
93  __threadfence();
94 #ifdef __CUDACC__
95  auto zero = (PtrAsInt)(&cellTracks[0]);
96  atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back...
97 #else
99 #endif
100  } else
101  return -1;
102  }
103  __threadfence();
104  return tracks().push_back(t);
105  }
106 
110  __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
111  __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); }
112  __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); }
113  __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); }
114  __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); }
115  __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; }
116  // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
117  __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); }
118  __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; }
119  // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
120  __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); }
121 
122  __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); }
123  __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); }
124 
125  __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); }
126  __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); }
127 
128  constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
129  constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
130 
131  __device__ void print_cell() const {
132  printf("printing cell: %d, on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
135  theInnerHitId,
136  theOuterHitId);
137  }
138 
140  GPUCACell const& otherCell,
141  const float ptmin,
142  const float hardCurvCut,
143  const float caThetaCutBarrel,
144  const float caThetaCutForward,
145  const float dcaCutInnerTriplet,
146  const float dcaCutOuterTriplet) const {
147  // detIndex of the layerStart for the Phase1 Pixel Detector:
148  // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
149  // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
150  auto ri = inner_r(hh);
151  auto zi = inner_z(hh);
152 
153  auto ro = outer_r(hh);
154  auto zo = outer_z(hh);
155 
156  auto r1 = otherCell.inner_r(hh);
157  auto z1 = otherCell.inner_z(hh);
158  auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex;
159  bool aligned = areAlignedRZ(r1,
160  z1,
161  ri,
162  zi,
163  ro,
164  zo,
165  ptmin,
166  isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
167  return (aligned && dcaCut(hh,
168  otherCell,
169  otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
170  : dcaCutOuterTriplet,
171  hardCurvCut)); // FIXME tune cuts
172  }
173 
174  __device__ __forceinline__ static bool areAlignedRZ(
175  float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
176  float radius_diff = std::abs(r1 - ro);
177  float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
178 
179  float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
180  // radius_diff later
181 
182  float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
184  }
185 
186  __device__ inline bool dcaCut(Hits const& hh,
187  GPUCACell const& otherCell,
188  const float region_origin_radius_plus_tolerance,
189  const float maxCurv) const {
190  auto x1 = otherCell.inner_x(hh);
191  auto y1 = otherCell.inner_y(hh);
192 
193  auto x2 = inner_x(hh);
194  auto y2 = inner_y(hh);
195 
196  auto x3 = outer_x(hh);
197  auto y3 = outer_y(hh);
198 
199  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
200 
201  if (eq.curvature() > maxCurv)
202  return false;
203 
204  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
205  }
206 
207  __device__ __forceinline__ static bool dcaCutH(float x1,
208  float y1,
209  float x2,
210  float y2,
211  float x3,
212  float y3,
213  const float region_origin_radius_plus_tolerance,
214  const float maxCurv) {
215  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
216 
217  if (eq.curvature() > maxCurv)
218  return false;
219 
220  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
221  }
222 
223  __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const {
228  int p = innerCell.inner_iphi(hh);
229  if (p < 0)
232  p %= max_ladder_bpx0;
233  auto il = first_ladder_bpx0 + p;
234  auto r0 = hh.averageGeometry().ladderR[il];
235  auto ri = innerCell.inner_r(hh);
236  auto zi = innerCell.inner_z(hh);
237  auto ro = outer_r(hh);
238  auto zo = outer_z(hh);
239  auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
240  auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
241  auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
242  auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
243  return gap;
244  }
245 
246  __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const {
251  int p = outer_iphi(hh);
252  if (p < 0)
255  p %= max_ladder_bpx4;
256  auto il = first_ladder_bpx4 + p;
257  auto r4 = hh.averageGeometry().ladderR[il];
258  auto ri = innerCell.inner_r(hh);
259  auto zi = innerCell.inner_z(hh);
260  auto ro = outer_r(hh);
261  auto zo = outer_z(hh);
262  auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
263  auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
264  auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
265  auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
266  auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
267  auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
268  return gap || holeP || holeN;
269  }
270 
271  // trying to free the track building process from hardcoded layers, leaving
272  // the visit of the graph based on the neighborhood connections between cells.
273  __device__ inline void find_ntuplets(Hits const& hh,
274  GPUCACell* __restrict__ cells,
278  Quality* __restrict__ quality,
279  TmpTuple& tmpNtuplet,
280  const unsigned int minHitsPerNtuplet,
281  bool startAt0) const {
282  // the building process for a track ends if:
283  // it has no right neighbor
284  // it has no compatible neighbor
285  // the ntuplets is then saved if the number of hits it contains is greater
286  // than a threshold
287 
288  tmpNtuplet.push_back_unsafe(theDoubletId_);
289  assert(tmpNtuplet.size() <= 4);
290 
291  bool last = true;
292  for (unsigned int otherCell : outerNeighbors()) {
293  if (cells[otherCell].theDoubletId_ < 0)
294  continue; // killed by earlyFishbone
295  last = false;
296  cells[otherCell].find_ntuplets(
297  hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
298  }
299  if (last) { // if long enough save...
300  if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
301 #ifdef ONLY_TRIPLETS_IN_HOLE
302  // triplets accepted only pointing to the hole
303  if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
304  ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
305 #endif
306  {
307  hindex_type hits[6];
308  auto nh = 0U;
309  for (auto c : tmpNtuplet) {
310  hits[nh++] = cells[c].theInnerHitId;
311  }
312  hits[nh] = theOuterHitId;
313  auto it = foundNtuplets.bulkFill(apc, hits, tmpNtuplet.size() + 1);
314  if (it >= 0) { // if negative is overflow....
315  for (auto c : tmpNtuplet)
316  cells[c].addTrack(it, cellTracks);
317  quality[it] = bad; // initialize to bad
318  }
319  }
320  }
321  }
322  tmpNtuplet.pop_back();
323  assert(tmpNtuplet.size() < 4);
324  }
325 
326  // Cell status management
327  __device__ __forceinline__ void kill() { theDoubletId_ = -1; }
328  __device__ __forceinline__ bool isKilled() const { return theDoubletId_ < 0; }
329 
330  __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
331 
332  __device__ __forceinline__ bool unused() const { return !theUsed_; }
333  __device__ __forceinline__ void setUsedBit(uint16_t bit) { theUsed_ |= bit; }
334 
335 private:
338 
339  int32_t theDoubletId_;
340  int16_t theLayerPairId_;
341  uint16_t theUsed_; // tbd
342 
343  float theInnerZ;
344  float theInnerR;
345  hindex_type theInnerHitId;
347 };
348 
349 #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
caConstants::OuterHitOfCell
cms::cuda::VecArray< uint32_t, maxCellsPerHit > OuterHitOfCell
Definition: CAConstants.h:75
CircleEq.h
cellNeighbors
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector * cellNeighbors
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
init
int init
Definition: HydjetWrapper.h:64
mps_fire.i
i
Definition: mps_fire.py:428
GPUCACell::bad
static constexpr auto bad
Definition: GPUCACell.h:38
GPUCACell::zi
__device__ float float float zi
Definition: GPUCACell.h:175
hfClusterShapes_cfi.hits
hits
Definition: hfClusterShapes_cfi.py:5
cms::cuda::SimpleVector::reset
constexpr void reset()
Definition: SimpleVector.h:108
pixelTrack::Quality::bad
caConstants::CellTracks
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:70
GPUCACell::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:26
testProducerWithPsetDescEmpty_cfi.x2
x2
Definition: testProducerWithPsetDescEmpty_cfi.py:28
caConstants::last_bpix1_detIndex
constexpr uint32_t last_bpix1_detIndex
Definition: CAConstants.h:62
caConstants::CellNeighbors
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:69
GPUCACell::theDoubletId_
theDoubletId_
Definition: GPUCACell.h:51
TrackingRecHit2DHeterogeneous.h
GPUCACell::layerPairId
__device__ CellTracksVector Hits const int layerPairId
Definition: GPUCACell.h:43
caConstants::module_length_bpx0
constexpr float module_length_bpx0
Definition: CAConstants.h:50
cms::cudacompat::__threadfence
void __threadfence()
Definition: cudaCompat.h:109
GPUCACell::inner_hit_id
constexpr unsigned int inner_hit_id() const
Definition: GPUCACell.h:128
caConstants::max_ladder_bpx0
constexpr uint32_t max_ladder_bpx0
Definition: CAConstants.h:48
GPUCACell::check_alignment
__device__ bool check_alignment(Hits const &hh, GPUCACell const &otherCell, const float ptmin, const float hardCurvCut, const float caThetaCutBarrel, const float caThetaCutForward, const float dcaCutInnerTriplet, const float dcaCutOuterTriplet) const
Definition: GPUCACell.h:139
TrackingRecHit2DSOAView::hindex_type
uint32_t hindex_type
Definition: TrackingRecHit2DSOAView.h:21
cells
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ cells
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
GPUCACell::cellTracks
__device__ CellTracksVector & cellTracks
Definition: GPUCACell.h:43
GPUCACell::maxCellsPerHit
static constexpr auto maxCellsPerHit
Definition: GPUCACell.h:24
cms::cuda::VecArray::value_t
T value_t
Definition: VecArray.h:17
caConstants::maxCellsPerHit
constexpr uint32_t maxCellsPerHit
Definition: CAConstants.h:37
GPUCACell::thetaCut
__device__ float float float float float const float const float thetaCut
Definition: GPUCACell.h:175
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16
SiPixelPI::zero
Definition: SiPixelPayloadInspectorHelper.h:39
GPUCACell::Hits
TrackingRecHit2DSOAView Hits
Definition: GPUCACell.h:31
GPUCACell::hh
__device__ CellTracksVector Hits const & hh
Definition: GPUCACell.h:43
GPUCACell::theLayerPairId_
theLayerPairId_
Definition: GPUCACell.h:52
cms::cuda::OneToManyAssoc
Definition: OneToManyAssoc.h:143
caConstants::last_barrel_detIndex
constexpr uint32_t last_barrel_detIndex
Definition: CAConstants.h:63
GPUCACell::outerHitId
__device__ CellTracksVector Hits const int int hindex_type hindex_type outerHitId
Definition: GPUCACell.h:48
cms::cuda::SimpleVector
Definition: SimpleVector.h:15
caConstants::module_tolerance_bpx0
constexpr float module_tolerance_bpx0
Definition: CAConstants.h:51
quality
const uint32_t *__restrict__ Quality * quality
Definition: CAHitNtupletGeneratorKernelsImpl.h:122
GPUCACell::GPUCACell
GPUCACell()=default
GPUCACell::CellTracks
caConstants::CellTracks CellTracks
Definition: GPUCACell.h:27
testProducerWithPsetDescEmpty_cfi.x1
x1
Definition: testProducerWithPsetDescEmpty_cfi.py:33
dqmdumpme.last
last
Definition: dqmdumpme.py:56
testProducerWithPsetDescEmpty_cfi.y1
y1
Definition: testProducerWithPsetDescEmpty_cfi.py:29
GPUCACell::outer_hit_id
constexpr unsigned int outer_hit_id() const
Definition: GPUCACell.h:129
pixelTrack::Quality
Quality
Definition: TrackSoAHeterogeneousT.h:13
GPUCACell::z1
__device__ float z1
Definition: GPUCACell.h:175
GPUCACell::print_cell
__device__ void print_cell() const
Definition: GPUCACell.h:131
GPUCACell::doubletId
__device__ CellTracksVector Hits const int int doubletId
Definition: GPUCACell.h:43
mathSSE::sqrt
T sqrt(T t)
Definition: SSEVec.h:19
GPUCACell::CellTracksVector
caConstants::CellTracksVector CellTracksVector
Definition: GPUCACell.h:29
GPUCACell::zo
__device__ float float float float float zo
Definition: GPUCACell.h:175
SimpleVector.h
HLTMuonOfflineAnalyzer_cfi.z0
z0
Definition: HLTMuonOfflineAnalyzer_cfi.py:98
GPUCACell::TmpTuple
cms::cuda::VecArray< uint32_t, 6 > TmpTuple
Definition: GPUCACell.h:34
HLT_FULL_cff.gap
gap
Definition: HLT_FULL_cff.py:8513
VecArray.h
CircleEq
Definition: CircleEq.h:24
mitigatedMETSequence_cff.U
U
Definition: mitigatedMETSequence_cff.py:36
particleFlowDisplacedVertexCandidate_cfi.dcaCut
dcaCut
Definition: particleFlowDisplacedVertexCandidate_cfi.py:17
GPUCACell::tan_12_13_half_mul_distance_13_squared
float tan_12_13_half_mul_distance_13_squared
Definition: GPUCACell.h:182
testProducerWithPsetDescEmpty_cfi.y2
y2
Definition: testProducerWithPsetDescEmpty_cfi.py:30
PixelPluginsPhase0_cfi.isBarrel
isBarrel
Definition: PixelPluginsPhase0_cfi.py:17
GPUCACell::theInnerZ
theInnerZ
Definition: GPUCACell.h:56
cms::cuda::AtomicPairCounter
Definition: AtomicPairCounter.h:11
AlCaHLTBitMon_ParallelJobs.p
def p
Definition: AlCaHLTBitMon_ParallelJobs.py:153
tracks
const uint32_t *__restrict__ const HitContainer *__restrict__ TkSoA *__restrict__ tracks
Definition: CAHitNtupletGeneratorKernelsImpl.h:176
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
GPUCACell::theTracks
theTracks
Definition: GPUCACell.h:61
caConstants::CellNeighborsVector
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:72
GPUCACell::ro
__device__ float float float float ro
Definition: GPUCACell.h:175
caConstants::max_ladder_bpx4
constexpr uint32_t max_ladder_bpx4
Definition: CAConstants.h:52
createfilelist.int
int
Definition: createfilelist.py:10
cms::cuda::VecArray
Definition: VecArray.h:14
caConstants::module_tolerance_bpx4
constexpr float module_tolerance_bpx4
Definition: CAConstants.h:57
foundNtuplets
const uint32_t *__restrict__ HitContainer * foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:139
caConstants::first_ladder_bpx4
constexpr uint32_t first_ladder_bpx4
Definition: CAConstants.h:53
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
GPUCACell::pMin
float pMin
Definition: GPUCACell.h:179
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
cms::cuda::nh
uint32_t nh
Definition: HistoContainer.h:11
GPUCACell::ri
__device__ float float ri
Definition: GPUCACell.h:175
cms::cudacompat::atomicCAS
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53
caConstants::module_length_bpx4
constexpr float module_length_bpx4
Definition: CAConstants.h:56
relativeConstraints.empty
bool empty
Definition: relativeConstraints.py:46
caConstants::first_ladder_bpx0
constexpr uint32_t first_ladder_bpx0
Definition: CAConstants.h:49
apc
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter * apc
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
GPUCACell::PtrAsInt
unsigned long long PtrAsInt
Definition: GPUCACell.h:22
GPUCACell::theInnerR
theInnerR
Definition: GPUCACell.h:57
PixelTrackHeterogeneous.h
pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:78
GPUCACell
Definition: GPUCACell.h:20
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
GPUCACell::distance_13_squared
float distance_13_squared
Definition: GPUCACell.h:177
cuda_assert.h
c
auto & c
Definition: CAHitNtupletGeneratorKernelsImpl.h:56
GPUCACell::theOuterNeighbors
theOuterNeighbors
Definition: GPUCACell.h:60
CAConstants.h
GPUCACell::hindex_type
Hits::hindex_type hindex_type
Definition: GPUCACell.h:32
submitPVValidationJobs.t
string t
Definition: submitPVValidationJobs.py:644
cms::cuda::SimpleVector::extend
__device__ int extend(int size=1)
Definition: SimpleVector.h:84
GPUCACell::theUsed_
theUsed_
Definition: GPUCACell.h:53
GPUCACell::assert
assert(outerNeighbors().empty())
GPUCACell::ptmin
__device__ float float float float float const float ptmin
Definition: GPUCACell.h:175
GPUCACell::innerHitId
__device__ CellTracksVector Hits const int int hindex_type innerHitId
Definition: GPUCACell.h:43
GPUCACell::theOuterHitId
theOuterHitId
Definition: GPUCACell.h:50
caConstants::CellTracksVector
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:73