CMS 3D CMS Logo

GPUCACell.h
Go to the documentation of this file.
1 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
2 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
3 
4 //
5 // Author: Felice Pantaleo, CERN
6 //
7 
8 // #define ONLY_TRIPLETS_IN_HOLE
9 
10 #include <cuda_runtime.h>
11 
18 #include "CAConstants.h"
19 
20 class GPUCACell {
21 public:
22  using PtrAsInt = unsigned long long;
23 
30 
33 
35 
38  static constexpr auto bad = pixelTrack::Quality::bad;
39 
40  GPUCACell() = default;
41 
44  Hits const& hh,
45  int layerPairId,
46  int doubletId,
49  theInnerHitId = innerHitId;
53  theUsed_ = 0;
54 
55  // optimization that depends on access pattern
56  theInnerZ = hh.zGlobal(innerHitId);
57  theInnerR = hh.rGlobal(innerHitId);
58 
59  // link to default empty
62  assert(outerNeighbors().empty());
63  assert(tracks().empty());
64  }
65 
67  // use smart cache
68  if (outerNeighbors().empty()) {
69  auto i = cellNeighbors.extend(); // maybe wasted....
70  if (i > 0) {
71  cellNeighbors[i].reset();
72 #ifdef __CUDACC__
73  auto zero = (PtrAsInt)(&cellNeighbors[0]);
75  zero,
76  (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back...
77 #else
79 #endif
80  } else
81  return -1;
82  }
83  __threadfence();
84  return outerNeighbors().push_back(t);
85  }
86 
88  if (tracks().empty()) {
89  auto i = cellTracks.extend(); // maybe wasted....
90  if (i > 0) {
91  cellTracks[i].reset();
92 #ifdef __CUDACC__
93  auto zero = (PtrAsInt)(&cellTracks[0]);
94  atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back...
95 #else
97 #endif
98  } else
99  return -1;
100  }
101  __threadfence();
102  return tracks().push_back(t);
103  }
104 
108  __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
109  __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); }
110  __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); }
111  __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); }
112  __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); }
113  __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; }
114  // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
115  __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); }
116  __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; }
117  // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
118  __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); }
119 
120  __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); }
121  __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); }
122 
123  __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); }
124  __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); }
125 
126  constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
127  constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
128 
129  __device__ void print_cell() const {
130  printf("printing cell: %d, on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
133  theInnerHitId,
134  theOuterHitId);
135  }
136 
138  GPUCACell const& otherCell,
139  const float ptmin,
140  const float hardCurvCut,
141  const float caThetaCutBarrel,
142  const float caThetaCutForward,
143  const float dcaCutInnerTriplet,
144  const float dcaCutOuterTriplet) const {
145  // detIndex of the layerStart for the Phase1 Pixel Detector:
146  // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
147  // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
148  auto ri = inner_r(hh);
149  auto zi = inner_z(hh);
150 
151  auto ro = outer_r(hh);
152  auto zo = outer_z(hh);
153 
154  auto r1 = otherCell.inner_r(hh);
155  auto z1 = otherCell.inner_z(hh);
156  auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex;
157  bool aligned = areAlignedRZ(r1,
158  z1,
159  ri,
160  zi,
161  ro,
162  zo,
163  ptmin,
164  isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
165  return (aligned && dcaCut(hh,
166  otherCell,
167  otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
168  : dcaCutOuterTriplet,
169  hardCurvCut)); // FIXME tune cuts
170  }
171 
172  __device__ __forceinline__ static bool areAlignedRZ(
173  float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
174  float radius_diff = std::abs(r1 - ro);
175  float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
176 
177  float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
178  // radius_diff later
179 
180  float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
182  }
183 
184  __device__ inline bool dcaCut(Hits const& hh,
185  GPUCACell const& otherCell,
186  const float region_origin_radius_plus_tolerance,
187  const float maxCurv) const {
188  auto x1 = otherCell.inner_x(hh);
189  auto y1 = otherCell.inner_y(hh);
190 
191  auto x2 = inner_x(hh);
192  auto y2 = inner_y(hh);
193 
194  auto x3 = outer_x(hh);
195  auto y3 = outer_y(hh);
196 
197  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
198 
199  if (eq.curvature() > maxCurv)
200  return false;
201 
202  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
203  }
204 
205  __device__ __forceinline__ static bool dcaCutH(float x1,
206  float y1,
207  float x2,
208  float y2,
209  float x3,
210  float y3,
211  const float region_origin_radius_plus_tolerance,
212  const float maxCurv) {
213  CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
214 
215  if (eq.curvature() > maxCurv)
216  return false;
217 
218  return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
219  }
220 
221  __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const {
226  int p = innerCell.inner_iphi(hh);
227  if (p < 0)
230  p %= max_ladder_bpx0;
231  auto il = first_ladder_bpx0 + p;
232  auto r0 = hh.averageGeometry().ladderR[il];
233  auto ri = innerCell.inner_r(hh);
234  auto zi = innerCell.inner_z(hh);
235  auto ro = outer_r(hh);
236  auto zo = outer_z(hh);
237  auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
238  auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
239  auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
240  auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
241  return gap;
242  }
243 
244  __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const {
249  int p = outer_iphi(hh);
250  if (p < 0)
253  p %= max_ladder_bpx4;
254  auto il = first_ladder_bpx4 + p;
255  auto r4 = hh.averageGeometry().ladderR[il];
256  auto ri = innerCell.inner_r(hh);
257  auto zi = innerCell.inner_z(hh);
258  auto ro = outer_r(hh);
259  auto zo = outer_z(hh);
260  auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
261  auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
262  auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
263  auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
264  auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
265  auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
266  return gap || holeP || holeN;
267  }
268 
269  // trying to free the track building process from hardcoded layers, leaving
270  // the visit of the graph based on the neighborhood connections between cells.
271  __device__ inline void find_ntuplets(Hits const& hh,
272  GPUCACell* __restrict__ cells,
276  Quality* __restrict__ quality,
277  TmpTuple& tmpNtuplet,
278  const unsigned int minHitsPerNtuplet,
279  bool startAt0) const {
280  // the building process for a track ends if:
281  // it has no right neighbor
282  // it has no compatible neighbor
283  // the ntuplets is then saved if the number of hits it contains is greater
284  // than a threshold
285 
286  tmpNtuplet.push_back_unsafe(theDoubletId_);
287  assert(tmpNtuplet.size() <= 4);
288 
289  bool last = true;
290  for (unsigned int otherCell : outerNeighbors()) {
291  if (cells[otherCell].theDoubletId_ < 0)
292  continue; // killed by earlyFishbone
293  last = false;
294  cells[otherCell].find_ntuplets(
295  hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
296  }
297  if (last) { // if long enough save...
298  if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
299 #ifdef ONLY_TRIPLETS_IN_HOLE
300  // triplets accepted only pointing to the hole
301  if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
302  ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
303 #endif
304  {
305  hindex_type hits[6];
306  auto nh = 0U;
307  for (auto c : tmpNtuplet) {
308  hits[nh++] = cells[c].theInnerHitId;
309  }
310  hits[nh] = theOuterHitId;
311  auto it = foundNtuplets.bulkFill(apc, hits, tmpNtuplet.size() + 1);
312  if (it >= 0) { // if negative is overflow....
313  for (auto c : tmpNtuplet)
314  cells[c].addTrack(it, cellTracks);
315  quality[it] = bad; // initialize to bad
316  }
317  }
318  }
319  }
320  tmpNtuplet.pop_back();
321  assert(tmpNtuplet.size() < 4);
322  }
323 
324  // Cell status management
325  __device__ __forceinline__ void kill() { theDoubletId_ = -1; }
326  __device__ __forceinline__ bool isKilled() const { return theDoubletId_ < 0; }
327 
328  __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
329 
330  __device__ __forceinline__ bool unused() const { return !theUsed_; }
331  __device__ __forceinline__ void setUsedBit(uint16_t bit) { theUsed_ |= bit; }
332 
333 private:
336 
337  int32_t theDoubletId_;
338  int16_t theLayerPairId_;
339  uint16_t theUsed_; // tbd
340 
341  float theInnerZ;
342  float theInnerR;
343  hindex_type theInnerHitId;
345 };
346 
347 #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
caConstants::OuterHitOfCell
cms::cuda::VecArray< uint32_t, maxCellsPerHit > OuterHitOfCell
Definition: CAConstants.h:75
CircleEq.h
cellNeighbors
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector * cellNeighbors
Definition: CAHitNtupletGeneratorKernelsImpl.h:33
init
int init
Definition: HydjetWrapper.h:64
mps_fire.i
i
Definition: mps_fire.py:428
GPUCACell::bad
static constexpr auto bad
Definition: GPUCACell.h:38
GPUCACell::zi
__device__ float float float zi
Definition: GPUCACell.h:173
hfClusterShapes_cfi.hits
hits
Definition: hfClusterShapes_cfi.py:5
cms::cuda::SimpleVector::reset
constexpr void reset()
Definition: SimpleVector.h:108
pixelTrack::Quality::bad
caConstants::CellTracks
cms::cuda::VecArray< tindex_type, maxCellTracks > CellTracks
Definition: CAConstants.h:70
GPUCACell::CellNeighbors
caConstants::CellNeighbors CellNeighbors
Definition: GPUCACell.h:26
testProducerWithPsetDescEmpty_cfi.x2
x2
Definition: testProducerWithPsetDescEmpty_cfi.py:28
caConstants::last_bpix1_detIndex
constexpr uint32_t last_bpix1_detIndex
Definition: CAConstants.h:62
caConstants::CellNeighbors
cms::cuda::VecArray< uint32_t, maxCellNeighbors > CellNeighbors
Definition: CAConstants.h:69
AlCaHLTBitMon_ParallelJobs.p
p
Definition: AlCaHLTBitMon_ParallelJobs.py:153
GPUCACell::theDoubletId_
theDoubletId_
Definition: GPUCACell.h:51
TrackingRecHit2DHeterogeneous.h
GPUCACell::layerPairId
__device__ CellTracksVector Hits const int layerPairId
Definition: GPUCACell.h:43
caConstants::module_length_bpx0
constexpr float module_length_bpx0
Definition: CAConstants.h:50
cms::cudacompat::__threadfence
void __threadfence()
Definition: cudaCompat.h:78
GPUCACell::inner_hit_id
constexpr unsigned int inner_hit_id() const
Definition: GPUCACell.h:126
caConstants::max_ladder_bpx0
constexpr uint32_t max_ladder_bpx0
Definition: CAConstants.h:48
GPUCACell::check_alignment
__device__ bool check_alignment(Hits const &hh, GPUCACell const &otherCell, const float ptmin, const float hardCurvCut, const float caThetaCutBarrel, const float caThetaCutForward, const float dcaCutInnerTriplet, const float dcaCutOuterTriplet) const
Definition: GPUCACell.h:137
TrackingRecHit2DSOAView::hindex_type
uint32_t hindex_type
Definition: TrackingRecHit2DSOAView.h:18
cells
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ cells
Definition: CAHitNtupletGeneratorKernelsImpl.h:33
GPUCACell::cellTracks
__device__ CellTracksVector & cellTracks
Definition: GPUCACell.h:43
GPUCACell::maxCellsPerHit
static constexpr auto maxCellsPerHit
Definition: GPUCACell.h:24
cms::cuda::VecArray::value_t
T value_t
Definition: VecArray.h:17
caConstants::maxCellsPerHit
constexpr uint32_t maxCellsPerHit
Definition: CAConstants.h:37
GPUCACell::thetaCut
__device__ float float float float float const float const float thetaCut
Definition: GPUCACell.h:173
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:15
SiPixelPI::zero
Definition: SiPixelPayloadInspectorHelper.h:39
GPUCACell::Hits
TrackingRecHit2DSOAView Hits
Definition: GPUCACell.h:31
GPUCACell::hh
__device__ CellTracksVector Hits const & hh
Definition: GPUCACell.h:43
GPUCACell::theLayerPairId_
theLayerPairId_
Definition: GPUCACell.h:52
caConstants::last_barrel_detIndex
constexpr uint32_t last_barrel_detIndex
Definition: CAConstants.h:63
GPUCACell::outerHitId
__device__ CellTracksVector Hits const int int hindex_type hindex_type outerHitId
Definition: GPUCACell.h:48
cms::cuda::SimpleVector
Definition: SimpleVector.h:15
caConstants::module_tolerance_bpx0
constexpr float module_tolerance_bpx0
Definition: CAConstants.h:51
quality
const uint32_t *__restrict__ Quality * quality
Definition: CAHitNtupletGeneratorKernelsImpl.h:109
GPUCACell::GPUCACell
GPUCACell()=default
GPUCACell::CellTracks
caConstants::CellTracks CellTracks
Definition: GPUCACell.h:27
testProducerWithPsetDescEmpty_cfi.x1
x1
Definition: testProducerWithPsetDescEmpty_cfi.py:33
dqmdumpme.last
last
Definition: dqmdumpme.py:56
testProducerWithPsetDescEmpty_cfi.y1
y1
Definition: testProducerWithPsetDescEmpty_cfi.py:29
GPUCACell::outer_hit_id
constexpr unsigned int outer_hit_id() const
Definition: GPUCACell.h:127
pixelTrack::Quality
Quality
Definition: TrackSoAHeterogeneousT.h:10
GPUCACell::z1
__device__ float z1
Definition: GPUCACell.h:173
GPUCACell::print_cell
__device__ void print_cell() const
Definition: GPUCACell.h:129
GPUCACell::doubletId
__device__ CellTracksVector Hits const int int doubletId
Definition: GPUCACell.h:43
mathSSE::sqrt
T sqrt(T t)
Definition: SSEVec.h:19
cms::cuda::nh
uint32_t nh
Definition: HistoContainer.h:23
GPUCACell::CellTracksVector
caConstants::CellTracksVector CellTracksVector
Definition: GPUCACell.h:29
GPUCACell::zo
__device__ float float float float float zo
Definition: GPUCACell.h:173
SimpleVector.h
HLTMuonOfflineAnalyzer_cfi.z0
z0
Definition: HLTMuonOfflineAnalyzer_cfi.py:98
GPUCACell::TmpTuple
cms::cuda::VecArray< uint32_t, 6 > TmpTuple
Definition: GPUCACell.h:34
HLT_FULL_cff.gap
gap
Definition: HLT_FULL_cff.py:8498
VecArray.h
CircleEq
Definition: CircleEq.h:24
mitigatedMETSequence_cff.U
U
Definition: mitigatedMETSequence_cff.py:36
particleFlowDisplacedVertexCandidate_cfi.dcaCut
dcaCut
Definition: particleFlowDisplacedVertexCandidate_cfi.py:17
GPUCACell::tan_12_13_half_mul_distance_13_squared
float tan_12_13_half_mul_distance_13_squared
Definition: GPUCACell.h:180
testProducerWithPsetDescEmpty_cfi.y2
y2
Definition: testProducerWithPsetDescEmpty_cfi.py:30
PixelPluginsPhase0_cfi.isBarrel
isBarrel
Definition: PixelPluginsPhase0_cfi.py:17
GPUCACell::theInnerZ
theInnerZ
Definition: GPUCACell.h:56
cms::cuda::AtomicPairCounter
Definition: AtomicPairCounter.h:11
tracks
const uint32_t *__restrict__ const HitContainer *__restrict__ TkSoA *__restrict__ tracks
Definition: CAHitNtupletGeneratorKernelsImpl.h:159
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
GPUCACell::theTracks
theTracks
Definition: GPUCACell.h:61
caConstants::CellNeighborsVector
cms::cuda::SimpleVector< CellNeighbors > CellNeighborsVector
Definition: CAConstants.h:72
GPUCACell::ro
__device__ float float float float ro
Definition: GPUCACell.h:173
caConstants::max_ladder_bpx4
constexpr uint32_t max_ladder_bpx4
Definition: CAConstants.h:52
createfilelist.int
int
Definition: createfilelist.py:10
cms::cuda::VecArray
Definition: VecArray.h:14
caConstants::module_tolerance_bpx4
constexpr float module_tolerance_bpx4
Definition: CAConstants.h:57
foundNtuplets
const uint32_t *__restrict__ HitContainer * foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:124
caConstants::first_ladder_bpx4
constexpr uint32_t first_ladder_bpx4
Definition: CAConstants.h:53
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
GPUCACell::pMin
float pMin
Definition: GPUCACell.h:177
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
GPUCACell::ri
__device__ float float ri
Definition: GPUCACell.h:173
cms::cudacompat::atomicCAS
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53
caConstants::module_length_bpx4
constexpr float module_length_bpx4
Definition: CAConstants.h:56
relativeConstraints.empty
bool empty
Definition: relativeConstraints.py:46
caConstants::first_ladder_bpx0
constexpr uint32_t first_ladder_bpx0
Definition: CAConstants.h:49
apc
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter * apc
Definition: CAHitNtupletGeneratorKernelsImpl.h:33
GPUCACell::PtrAsInt
unsigned long long PtrAsInt
Definition: GPUCACell.h:22
GPUCACell::theInnerR
theInnerR
Definition: GPUCACell.h:57
PixelTrackHeterogeneous.h
pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:69
GPUCACell
Definition: GPUCACell.h:20
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
GPUCACell::distance_13_squared
float distance_13_squared
Definition: GPUCACell.h:175
cms::cuda::HistoContainer
Definition: HistoContainer.h:152
cuda_assert.h
c
auto & c
Definition: CAHitNtupletGeneratorKernelsImpl.h:46
GPUCACell::theOuterNeighbors
theOuterNeighbors
Definition: GPUCACell.h:60
CAConstants.h
GPUCACell::hindex_type
Hits::hindex_type hindex_type
Definition: GPUCACell.h:32
submitPVValidationJobs.t
string t
Definition: submitPVValidationJobs.py:644
cms::cuda::SimpleVector::extend
__device__ int extend(int size=1)
Definition: SimpleVector.h:84
GPUCACell::theUsed_
theUsed_
Definition: GPUCACell.h:53
GPUCACell::assert
assert(outerNeighbors().empty())
GPUCACell::ptmin
__device__ float float float float float const float ptmin
Definition: GPUCACell.h:173
GPUCACell::innerHitId
__device__ CellTracksVector Hits const int int hindex_type innerHitId
Definition: GPUCACell.h:43
GPUCACell::theOuterHitId
theOuterHitId
Definition: GPUCACell.h:50
caConstants::CellTracksVector
cms::cuda::SimpleVector< CellTracks > CellTracksVector
Definition: CAConstants.h:73