CMS 3D CMS Logo

CAHitNtupletGeneratorKernelsImpl.h
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 // #define NTUPLE_DEBUG
6 // #define GPU_DEBUG
7 
8 #include <cmath>
9 #include <cstdint>
10 #include <limits>
11 
12 #include <cuda_runtime.h>
13 
17 
18 #include "CAConstants.h"
20 #include "GPUCACell.h"
21 #include "gpuFishbone.h"
22 #include "gpuPixelDoublets.h"
23 
26 
29 
33 
34 namespace {
35 
36  constexpr uint16_t tkNotFound = std::numeric_limits<uint16_t>::max();
37  constexpr float maxScore = std::numeric_limits<float>::max();
38  constexpr float nSigma2 = 25.f;
39 
40 } // namespace
41 
42 __global__ void kernel_checkOverflows(HitContainer const *foundNtuplets,
46  GPUCACell const *__restrict__ cells,
47  uint32_t const *__restrict__ nCells,
50  GPUCACell::OuterHitOfCell const *__restrict__ isOuterHitOfCell,
51  int32_t nHits,
52  uint32_t maxNumberOfDoublets,
54  auto first = threadIdx.x + blockIdx.x * blockDim.x;
55 
56  auto &c = *counters;
57  // counters once per event
58  if (0 == first) {
59  atomicAdd(&c.nEvents, 1);
60  atomicAdd(&c.nHits, nHits);
61  atomicAdd(&c.nCells, *nCells);
62  atomicAdd(&c.nTuples, apc->get().m);
63  atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
64  }
65 
66 #ifdef NTUPLE_DEBUG
67  if (0 == first) {
68  printf("number of found cells %d, found tuples %d with total hits %d out of %d %d\n",
69  *nCells,
70  apc->get().m,
71  apc->get().n,
72  nHits,
73  hitToTuple->totOnes());
75  assert(foundNtuplets->size(apc->get().m) == 0);
76  assert(foundNtuplets->size() == apc->get().n);
77  }
78  }
79 
80  for (int idx = first, nt = foundNtuplets->nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
81  if (foundNtuplets->size(idx) > 5)
82  printf("ERROR %d, %d\n", idx, foundNtuplets->size(idx));
83  assert(foundNtuplets->size(idx) < 6);
84  for (auto ih = foundNtuplets->begin(idx); ih != foundNtuplets->end(idx); ++ih)
85  assert(int(*ih) < nHits);
86  }
87 #endif
88 
89  if (0 == first) {
91  printf("Tuples overflow\n");
93  printf("Cells overflow\n");
94  if (cellNeighbors && cellNeighbors->full())
95  printf("cellNeighbors overflow\n");
96  if (cellTracks && cellTracks->full())
97  printf("cellTracks overflow\n");
98  if (int(hitToTuple->nOnes()) < nHits)
99  printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
100  }
101 
102  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
103  auto const &thisCell = cells[idx];
104  if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId];
105  printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
106  if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId];
107  printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
108  if (thisCell.isKilled())
109  atomicAdd(&c.nKilledCells, 1);
110  if (thisCell.unused())
111  atomicAdd(&c.nEmptyCells, 1);
112  if (0 == hitToTuple->size(thisCell.inner_hit_id()) && 0 == hitToTuple->size(thisCell.outer_hit_id()))
113  atomicAdd(&c.nZeroTrackCells, 1);
114  }
115 
116  for (int idx = first, nt = nHits; idx < nt; idx += gridDim.x * blockDim.x) {
117  if (isOuterHitOfCell[idx].full()) // ++tooManyOuterHitOfCell;
118  printf("OuterHitOfCell overflow %d\n", idx);
119  }
120 }
121 
122 __global__ void kernel_fishboneCleaner(GPUCACell const *cells, uint32_t const *__restrict__ nCells, Quality *quality) {
123  constexpr auto reject = pixelTrack::Quality::dup;
124 
125  auto first = threadIdx.x + blockIdx.x * blockDim.x;
126  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
127  auto const &thisCell = cells[idx];
128  if (!thisCell.isKilled())
129  continue;
130 
131  for (auto it : thisCell.tracks())
132  quality[it] = reject;
133  }
134 }
135 
136 // remove shorter tracks if sharing a cell
137 // It does not seem to affect efficiency in any way!
138 __global__ void kernel_earlyDuplicateRemover(GPUCACell const *cells,
139  uint32_t const *__restrict__ nCells,
141  Quality *quality,
143  // quality to mark rejected
144  constexpr auto reject = pixelTrack::Quality::edup;
145 
146  assert(nCells);
147  auto first = threadIdx.x + blockIdx.x * blockDim.x;
148  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
149  auto const &thisCell = cells[idx];
150 
151  if (thisCell.tracks().size() < 2)
152  continue;
153  //if (0==thisCell.theUsed) continue;
154  // if (thisCell.theDoubletId < 0) continue;
155 
156  uint32_t maxNh = 0;
157 
158  // find maxNh
159  for (auto it : thisCell.tracks()) {
160  auto nh = foundNtuplets->size(it);
161  maxNh = std::max(nh, maxNh);
162  }
163 
164  // quad pass through (leave it her for tests)
165  // maxNh = std::min(4U, maxNh);
166 
167  for (auto it : thisCell.tracks()) {
168  if (foundNtuplets->size(it) < maxNh)
169  quality[it] = reject; //no race: simple assignment of the same constant
170  }
171  }
172 }
173 
174 // assume the above (so, short tracks already removed)
175 __global__ void kernel_fastDuplicateRemover(GPUCACell const *__restrict__ cells,
176  uint32_t const *__restrict__ nCells,
177  HitContainer const *__restrict__ foundNtuplets,
178  TkSoA *__restrict__ tracks,
179  bool dupPassThrough) {
180  // quality to mark rejected
183 
184  assert(nCells);
185 
186  auto first = threadIdx.x + blockIdx.x * blockDim.x;
187  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
188  auto const &thisCell = cells[idx];
189  if (thisCell.tracks().size() < 2)
190  continue;
191  // if (thisCell.theDoubletId < 0) continue;
192 
193  float mc = maxScore;
194  uint16_t im = tkNotFound;
195 
196  /* chi2 penalize higher-pt tracks (try rescale it?)
197  auto score = [&](auto it) {
198  return foundNtuplets->size(it) < 4 ?
199  std::abs(tracks->tip(it)) : // tip for triplets
200  tracks->chi2(it); //chi2 for quads
201  };
202  */
203 
204  auto score = [&](auto it) { return std::abs(tracks->tip(it)); };
205 
206  // full crazy combinatorics
207  int ntr = thisCell.tracks().size();
208  for (int i = 0; i < ntr; ++i) {
209  auto it = thisCell.tracks()[i];
210  auto qi = tracks->quality(it);
211  if (qi <= reject)
212  continue;
213  auto opi = tracks->stateAtBS.state(it)(2);
214  auto e2opi = tracks->stateAtBS.covariance(it)(9);
215  auto cti = tracks->stateAtBS.state(it)(3);
216  auto e2cti = tracks->stateAtBS.covariance(it)(12);
217  for (auto j = i + 1; j < ntr; ++j) {
218  auto jt = thisCell.tracks()[j];
219  auto qj = tracks->quality(jt);
220  if (qj <= reject)
221  continue;
222 #ifdef GPU_DEBUG
223  if (foundNtuplets->size(it) != foundNtuplets->size(jt))
224  printf(" a mess\n");
225 #endif
226  auto opj = tracks->stateAtBS.state(jt)(2);
227  auto ctj = tracks->stateAtBS.state(jt)(3);
228  auto dct = nSigma2 * (tracks->stateAtBS.covariance(jt)(12) + e2cti);
229  if ((cti - ctj) * (cti - ctj) > dct)
230  continue;
231  auto dop = nSigma2 * (tracks->stateAtBS.covariance(jt)(9) + e2opi);
232  if ((opi - opj) * (opi - opj) > dop)
233  continue;
234  if ((qj < qi) || (qj == qi && score(it) < score(jt)))
235  tracks->quality(jt) = reject;
236  else {
237  tracks->quality(it) = reject;
238  break;
239  }
240  }
241  }
242 
243  // find maxQual
244  auto maxQual = reject; // no duplicate!
245  for (auto it : thisCell.tracks()) {
246  if (tracks->quality(it) > maxQual)
247  maxQual = tracks->quality(it);
248  }
249 
250  if (maxQual <= loose)
251  continue;
252 
253  // find min score
254  for (auto it : thisCell.tracks()) {
255  if (tracks->quality(it) == maxQual && score(it) < mc) {
256  mc = score(it);
257  im = it;
258  }
259  }
260 
261  if (tkNotFound == im)
262  continue;
263 
264  // mark all other duplicates (not yet, keep it loose)
265  for (auto it : thisCell.tracks()) {
266  if (tracks->quality(it) > loose && it != im)
267  tracks->quality(it) = loose; //no race: simple assignment of the same constant
268  }
269  }
270 }
271 
272 __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
273  cms::cuda::AtomicPairCounter *apc2, // just to zero them,
274  GPUCACell::Hits const *__restrict__ hhp,
275  GPUCACell *cells,
276  uint32_t const *__restrict__ nCells,
278  GPUCACell::OuterHitOfCell const *__restrict__ isOuterHitOfCell,
279  float hardCurvCut,
280  float ptmin,
281  float CAThetaCutBarrel,
282  float CAThetaCutForward,
283  float dcaCutInnerTriplet,
284  float dcaCutOuterTriplet) {
285  auto const &hh = *hhp;
286 
287  auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
288  auto first = threadIdx.x;
289  auto stride = blockDim.x;
290 
291  if (0 == (firstCellIndex + first)) {
292  (*apc1) = 0;
293  (*apc2) = 0;
294  } // ready for next kernel
295 
296  for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
297  auto cellIndex = idx;
298  auto &thisCell = cells[idx];
299  auto innerHitId = thisCell.inner_hit_id();
300  int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
301  auto vi = isOuterHitOfCell[innerHitId].data();
302 
303  auto ri = thisCell.inner_r(hh);
304  auto zi = thisCell.inner_z(hh);
305 
306  auto ro = thisCell.outer_r(hh);
307  auto zo = thisCell.outer_z(hh);
308  auto isBarrel = thisCell.inner_detIndex(hh) < caConstants::last_barrel_detIndex;
309 
310  for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
311  auto otherCell = __ldg(vi + j);
312  auto &oc = cells[otherCell];
313  auto r1 = oc.inner_r(hh);
314  auto z1 = oc.inner_z(hh);
315  bool aligned = GPUCACell::areAlignedRZ(
316  r1,
317  z1,
318  ri,
319  zi,
320  ro,
321  zo,
322  ptmin,
323  isBarrel ? CAThetaCutBarrel : CAThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
324  if (aligned && thisCell.dcaCut(hh,
325  oc,
326  oc.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
327  : dcaCutOuterTriplet,
328  hardCurvCut)) { // FIXME tune cuts
329  oc.addOuterNeighbor(cellIndex, *cellNeighbors);
330  thisCell.setUsedBit(1);
331  oc.setUsedBit(1);
332  }
333  } // loop on inner cells
334  } // loop on outer cells
335 }
336 
337 __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp,
338  GPUCACell *__restrict__ cells,
339  uint32_t const *nCells,
343  Quality *__restrict__ quality,
344  unsigned int minHitsPerNtuplet) {
345  // recursive: not obvious to widen
346  auto const &hh = *hhp;
347 
348  auto first = threadIdx.x + blockIdx.x * blockDim.x;
349  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
350  auto const &thisCell = cells[idx];
351  if (thisCell.isKilled())
352  continue; // cut by earlyFishbone
353  // we require at least three hits...
354  if (thisCell.outerNeighbors().empty())
355  continue;
356  auto pid = thisCell.layerPairId();
357  auto doit = minHitsPerNtuplet > 3 ? pid < 3 : pid < 8 || pid > 12;
358  if (doit) {
360  stack.reset();
361  thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3);
362  assert(stack.empty());
363  // printf("in %d found quadruplets: %d\n", cellIndex, apc->get());
364  }
365  }
366 }
367 
368 __global__ void kernel_mark_used(GPUCACell::Hits const *__restrict__ hhp,
369  GPUCACell *__restrict__ cells,
370  uint32_t const *nCells) {
371  auto first = threadIdx.x + blockIdx.x * blockDim.x;
372  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
373  auto &thisCell = cells[idx];
374  if (!thisCell.tracks().empty())
375  thisCell.setUsedBit(2);
376  }
377 }
378 
379 __global__ void kernel_countMultiplicity(HitContainer const *__restrict__ foundNtuplets,
380  Quality const *__restrict__ quality,
382  auto first = blockIdx.x * blockDim.x + threadIdx.x;
383  for (int it = first, nt = foundNtuplets->nOnes(); it < nt; it += gridDim.x * blockDim.x) {
384  auto nhits = foundNtuplets->size(it);
385  if (nhits < 3)
386  continue;
388  continue;
390  if (nhits > 5)
391  printf("wrong mult %d %d\n", it, nhits);
392  assert(nhits < 8);
393  tupleMultiplicity->count(nhits);
394  }
395 }
396 
397 __global__ void kernel_fillMultiplicity(HitContainer const *__restrict__ foundNtuplets,
398  Quality const *__restrict__ quality,
400  auto first = blockIdx.x * blockDim.x + threadIdx.x;
401  for (int it = first, nt = foundNtuplets->nOnes(); it < nt; it += gridDim.x * blockDim.x) {
402  auto nhits = foundNtuplets->size(it);
403  if (nhits < 3)
404  continue;
406  continue;
408  if (nhits > 5)
409  printf("wrong mult %d %d\n", it, nhits);
410  assert(nhits < 8);
411  tupleMultiplicity->fill(nhits, it);
412  }
413 }
414 
415 __global__ void kernel_classifyTracks(HitContainer const *__restrict__ tuples,
416  TkSoA const *__restrict__ tracks,
418  Quality *__restrict__ quality) {
419  int first = blockDim.x * blockIdx.x + threadIdx.x;
420  for (int it = first, nt = tuples->nOnes(); it < nt; it += gridDim.x * blockDim.x) {
421  auto nhits = tuples->size(it);
422  if (nhits == 0)
423  break; // guard
424 
425  // if duplicate: not even fit
427  continue;
428 
430 
431  // mark doublets as bad
432  if (nhits < 3)
433  continue;
434 
435  // if the fit has any invalid parameters, mark it as bad
436  bool isNaN = false;
437  for (int i = 0; i < 5; ++i) {
438  isNaN |= std::isnan(tracks->stateAtBS.state(it)(i));
439  }
440  if (isNaN) {
441 #ifdef NTUPLE_DEBUG
442  printf("NaN in fit %d size %d chi2 %f\n", it, tuples->size(it), tracks->chi2(it));
443 #endif
444  continue;
445  }
446 
448 
449  // compute a pT-dependent chi2 cut
450 
451  auto roughLog = [](float x) {
452  // max diff [0.5,12] at 1.25 0.16143
453  // average diff 0.0662998
454  union IF {
455  uint32_t i;
456  float f;
457  };
458  IF z;
459  z.f = x;
460  uint32_t lsb = 1 < 21;
461  z.i += lsb;
462  z.i >>= 21;
463  auto f = z.i & 3;
464  int ex = int(z.i >> 2) - 127;
465 
466  // log2(1+0.25*f)
467  // averaged over bins
468  const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
469  return float(ex) + frac[f];
470  };
471 
472  // (see CAHitNtupletGeneratorGPU.cc)
473  float pt = std::min<float>(tracks->pt(it), cuts.chi2MaxPt);
474  float chi2Cut = cuts.chi2Scale * (cuts.chi2Coeff[0] + roughLog(pt) * cuts.chi2Coeff[1]);
475  if (tracks->chi2(it) >= chi2Cut) {
476 #ifdef NTUPLE_FIT_DEBUG
477  printf("Bad chi2 %d size %d pt %f eta %f chi2 %f\n",
478  it,
479  tuples->size(it),
480  tracks->pt(it),
481  tracks->eta(it),
482  tracks->chi2(it));
483 #endif
484  continue;
485  }
486 
488 
489  // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip)
490  // default cuts:
491  // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm
492  // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
493  // (see CAHitNtupletGeneratorGPU.cc)
494  auto const &region = (nhits > 3) ? cuts.quadruplet : cuts.triplet;
495  bool isOk = (std::abs(tracks->tip(it)) < region.maxTip) and (tracks->pt(it) > region.minPt) and
496  (std::abs(tracks->zip(it)) < region.maxZip);
497 
498  if (isOk)
500  }
501 }
502 
503 __global__ void kernel_doStatsForTracks(HitContainer const *__restrict__ tuples,
504  Quality const *__restrict__ quality,
506  int first = blockDim.x * blockIdx.x + threadIdx.x;
507  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
508  if (tuples->size(idx) == 0)
509  break; //guard
511  continue;
512  atomicAdd(&(counters->nLooseTracks), 1);
514  continue;
515  atomicAdd(&(counters->nGoodTracks), 1);
516  }
517 }
518 
519 __global__ void kernel_countHitInTracks(HitContainer const *__restrict__ tuples,
520  Quality const *__restrict__ quality,
522  int first = blockDim.x * blockIdx.x + threadIdx.x;
523  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
524  if (tuples->size(idx) == 0)
525  break; // guard
526  for (auto h = tuples->begin(idx); h != tuples->end(idx); ++h)
527  hitToTuple->count(*h);
528  }
529 }
530 
531 __global__ void kernel_fillHitInTracks(HitContainer const *__restrict__ tuples,
532  Quality const *__restrict__ quality,
534  int first = blockDim.x * blockIdx.x + threadIdx.x;
535  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
536  if (tuples->size(idx) == 0)
537  break; // guard
538  for (auto h = tuples->begin(idx); h != tuples->end(idx); ++h)
539  hitToTuple->fill(*h, idx);
540  }
541 }
542 
543 __global__ void kernel_fillHitDetIndices(HitContainer const *__restrict__ tuples,
544  TrackingRecHit2DSOAView const *__restrict__ hhp,
545  HitContainer *__restrict__ hitDetIndices) {
546  int first = blockDim.x * blockIdx.x + threadIdx.x;
547  // copy offsets
548  for (int idx = first, ntot = tuples->totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
549  hitDetIndices->off[idx] = tuples->off[idx];
550  }
551  // fill hit indices
552  auto const &hh = *hhp;
553  auto nhits = hh.nHits();
554  for (int idx = first, ntot = tuples->size(); idx < ntot; idx += gridDim.x * blockDim.x) {
556  hitDetIndices->content[idx] = hh.detectorIndex(tuples->content[idx]);
557  }
558 }
559 
560 __global__ void kernel_doStatsForHitInTracks(CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ hitToTuple,
562  auto &c = *counters;
563  int first = blockDim.x * blockIdx.x + threadIdx.x;
564  for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
565  if (hitToTuple->size(idx) == 0)
566  continue; // SHALL NOT BE break
567  atomicAdd(&c.nUsedHits, 1);
568  if (hitToTuple->size(idx) > 1)
569  atomicAdd(&c.nDupHits, 1);
570  }
571 }
572 
573 __global__ void kernel_countSharedHit(int *__restrict__ nshared,
574  HitContainer const *__restrict__ ptuples,
575  Quality const *__restrict__ quality,
577  constexpr auto loose = pixelTrack::Quality::loose;
578 
579  auto &hitToTuple = *phitToTuple;
580  auto const &foundNtuplets = *ptuples;
581 
582  int first = blockDim.x * blockIdx.x + threadIdx.x;
583  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
584  if (hitToTuple.size(idx) < 2)
585  continue;
586 
587  int nt = 0;
588 
589  // count "good" tracks
590  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
591  if (quality[*it] < loose)
592  continue;
593  ++nt;
594  }
595 
596  if (nt < 2)
597  continue;
598 
599  // now mark each track triplet as sharing a hit
600  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
601  if (foundNtuplets.size(*it) > 3)
602  continue;
603  atomicAdd(&nshared[*it], 1);
604  }
605 
606  } // hit loop
607 }
608 
609 __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
610  HitContainer const *__restrict__ tuples,
611  Quality *__restrict__ quality,
612  bool dupPassThrough) {
613  // constexpr auto bad = pixelTrack::Quality::bad;
614  constexpr auto dup = pixelTrack::Quality::dup;
615  constexpr auto loose = pixelTrack::Quality::loose;
616  // constexpr auto strict = pixelTrack::Quality::strict;
617 
618  // quality to mark rejected
619  auto const reject = dupPassThrough ? loose : dup;
620 
621  int first = blockDim.x * blockIdx.x + threadIdx.x;
622  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
623  if (tuples->size(idx) == 0)
624  break; //guard
625  if (quality[idx] <= reject)
626  continue;
627  if (nshared[idx] > 2)
628  quality[idx] = reject;
629  }
630 }
631 
632 // mostly for very forward triplets.....
633 __global__ void kernel_rejectDuplicate(TrackingRecHit2DSOAView const *__restrict__ hhp,
634  HitContainer const *__restrict__ ptuples,
635  TkSoA const *__restrict__ ptracks,
636  Quality *__restrict__ quality,
637  uint16_t nmin,
638  bool dupPassThrough,
640  // quality to mark rejected
642 
643  auto &hitToTuple = *phitToTuple;
644  auto const &foundNtuplets = *ptuples;
645  auto const &tracks = *ptracks;
646 
647  int first = blockDim.x * blockIdx.x + threadIdx.x;
648  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
649  if (hitToTuple.size(idx) < 2)
650  continue;
651 
652  /* chi2 is bad for large pt
653  auto score = [&](auto it, auto nh) {
654  return nh < 4 ? std::abs(tracks.tip(it)) : // tip for triplets
655  tracks.chi2(it); //chi2
656  };
657  */
658  auto score = [&](auto it, auto nh) { return std::abs(tracks.tip(it)); };
659 
660  // full combinatorics
661  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
662  auto const it = *ip;
663  auto qi = quality[it];
664  if (qi <= reject)
665  continue;
666  auto opi = tracks.stateAtBS.state(it)(2);
667  auto e2opi = tracks.stateAtBS.covariance(it)(9);
668  auto cti = tracks.stateAtBS.state(it)(3);
669  auto e2cti = tracks.stateAtBS.covariance(it)(12);
670  auto nhi = foundNtuplets.size(it);
671  for (auto jp = ip + 1; jp != hitToTuple.end(idx); ++jp) {
672  auto const jt = *jp;
673  auto qj = quality[jt];
674  if (qj <= reject)
675  continue;
676  auto opj = tracks.stateAtBS.state(jt)(2);
677  auto ctj = tracks.stateAtBS.state(jt)(3);
678  auto dct = nSigma2 * (tracks.stateAtBS.covariance(jt)(12) + e2cti);
679  if ((cti - ctj) * (cti - ctj) > dct)
680  continue;
681  auto dop = nSigma2 * (tracks.stateAtBS.covariance(jt)(9) + e2opi);
682  if ((opi - opj) * (opi - opj) > dop)
683  continue;
684  auto nhj = foundNtuplets.size(jt);
685  if (nhj < nhi || (nhj == nhi && (qj < qi || (qj == qi && score(it, nhi) < score(jt, nhj)))))
686  quality[jt] = reject;
687  else {
688  quality[it] = reject;
689  break;
690  }
691  }
692  }
693  }
694 }
695 
696 __global__ void kernel_sharedHitCleaner(TrackingRecHit2DSOAView const *__restrict__ hhp,
697  HitContainer const *__restrict__ ptuples,
698  TkSoA const *__restrict__ ptracks,
699  Quality *__restrict__ quality,
700  uint16_t nmin,
701  bool dupPassThrough,
703  // quality to mark rejected
705  // quality of longest track
707 
708  auto &hitToTuple = *phitToTuple;
709  auto const &foundNtuplets = *ptuples;
710  // auto const &tracks = *ptracks;
711 
712  auto const &hh = *hhp;
713  int l1end = hh.hitsLayerStart()[1];
714 
715  int first = blockDim.x * blockIdx.x + threadIdx.x;
716  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
717  if (hitToTuple.size(idx) < 2)
718  continue;
719 
720  uint32_t maxNh = 0;
721 
722  // find maxNh
723  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
724  if (quality[*it] < longTqual)
725  continue;
726  uint32_t nh = foundNtuplets.size(*it);
727  maxNh = std::max(nh, maxNh);
728  }
729 
730  if (maxNh < 4)
731  continue;
732 
733  // quad pass through (leave for tests)
734  // maxNh = std::min(4U, maxNh);
735 
736  // kill all tracks shorter than maxHn (only triplets???
737  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
738  uint32_t nh = foundNtuplets.size(*it);
739 
740  //checking if shared hit is on bpix1 and if the tuple is short enough
741  if (idx < l1end and nh > nmin)
742  continue;
743 
744  if (nh < maxNh && quality[*it] > reject)
745  quality[*it] = reject;
746  }
747  }
748 }
749 
750 __global__ void kernel_tripletCleaner(TrackingRecHit2DSOAView const *__restrict__ hhp,
751  HitContainer const *__restrict__ ptuples,
752  TkSoA const *__restrict__ ptracks,
753  Quality *__restrict__ quality,
754  uint16_t nmin,
755  bool dupPassThrough,
757  // quality to mark rejected
758  auto const reject = pixelTrack::Quality::loose;
761 
762  auto &hitToTuple = *phitToTuple;
763  auto const &foundNtuplets = *ptuples;
764  auto const &tracks = *ptracks;
765 
766  int first = blockDim.x * blockIdx.x + threadIdx.x;
767  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
768  if (hitToTuple.size(idx) < 2)
769  continue;
770 
771  float mc = maxScore;
772  uint16_t im = tkNotFound;
773  uint32_t maxNh = 0;
774 
775  // find maxNh
776  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
777  if (quality[*it] <= good)
778  continue;
779  uint32_t nh = foundNtuplets.size(*it);
780  maxNh = std::max(nh, maxNh);
781  }
782 
783  // only triplets
784  if (maxNh != 3)
785  continue;
786 
787  // for triplets choose best tip! (should we first find best quality???)
788  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
789  auto const it = *ip;
790  if (quality[it] >= good && std::abs(tracks.tip(it)) < mc) {
791  mc = std::abs(tracks.tip(it));
792  im = it;
793  }
794  }
795 
796  if (tkNotFound == im)
797  continue;
798 
799  // mark worse ambiguities
800  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
801  auto const it = *ip;
802  if (quality[it] > reject && it != im)
803  quality[it] = reject; //no race: simple assignment of the same constant
804  }
805 
806  } // loop over hits
807 }
808 
809 __global__ void kernel_simpleTripletCleaner(
810  TrackingRecHit2DSOAView const *__restrict__ hhp,
811  HitContainer const *__restrict__ ptuples,
812  TkSoA const *__restrict__ ptracks,
813  Quality *__restrict__ quality,
814  uint16_t nmin,
815  bool dupPassThrough,
817  // quality to mark rejected
818  auto const reject = pixelTrack::Quality::loose;
820  auto const good = pixelTrack::Quality::loose;
821 
822  auto &hitToTuple = *phitToTuple;
823  auto const &foundNtuplets = *ptuples;
824  auto const &tracks = *ptracks;
825 
826  int first = blockDim.x * blockIdx.x + threadIdx.x;
827  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
828  if (hitToTuple.size(idx) < 2)
829  continue;
830 
831  float mc = maxScore;
832  uint16_t im = tkNotFound;
833 
834  // choose best tip! (should we first find best quality???)
835  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
836  auto const it = *ip;
837  if (quality[it] >= good && std::abs(tracks.tip(it)) < mc) {
838  mc = std::abs(tracks.tip(it));
839  im = it;
840  }
841  }
842 
843  if (tkNotFound == im)
844  continue;
845 
846  // mark worse ambiguities
847  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
848  auto const it = *ip;
849  if (quality[it] > reject && foundNtuplets.size(it) == 3 && it != im)
850  quality[it] = reject; //no race: simple assignment of the same constant
851  }
852 
853  } // loop over hits
854 }
855 
856 __global__ void kernel_print_found_ntuplets(TrackingRecHit2DSOAView const *__restrict__ hhp,
857  HitContainer const *__restrict__ ptuples,
858  TkSoA const *__restrict__ ptracks,
859  Quality const *__restrict__ quality,
861  int32_t maxPrint,
862  int iev) {
863  auto const &foundNtuplets = *ptuples;
864  auto const &tracks = *ptracks;
865  int first = blockDim.x * blockIdx.x + threadIdx.x;
866  for (int i = first, np = std::min(maxPrint, foundNtuplets.nOnes()); i < np; i += blockDim.x * gridDim.x) {
867  auto nh = foundNtuplets.size(i);
868  if (nh < 3)
869  continue;
870  printf("TK: %d %d %d %f %f %f %f %f %f %f %d %d %d %d %d\n",
871  10000 * iev + i,
872  int(quality[i]),
873  nh,
874  tracks.charge(i),
875  tracks.pt(i),
876  tracks.eta(i),
877  tracks.phi(i),
878  tracks.tip(i),
879  tracks.zip(i),
880  // asinhf(fit_results[i].par(3)),
881  tracks.chi2(i),
882  *foundNtuplets.begin(i),
883  *(foundNtuplets.begin(i) + 1),
884  *(foundNtuplets.begin(i) + 2),
885  nh > 3 ? int(*(foundNtuplets.begin(i) + 3)) : -1,
886  nh > 4 ? int(*(foundNtuplets.begin(i) + 4)) : -1);
887  }
888 }
889 
890 __global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *counters) {
891  auto const &c = *counters;
892  printf(
893  "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
894  "nDupHits | "
895  "nKilledCells | "
896  "nEmptyCells | nZeroTrackCells ||\n");
897  printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
898  c.nEvents,
899  c.nHits,
900  c.nCells,
901  c.nTuples,
902  c.nLooseTracks,
903  c.nGoodTracks,
904  c.nFitTracks,
905  c.nUsedHits,
906  c.nDupHits,
907  c.nKilledCells,
908  c.nEmptyCells,
909  c.nZeroTrackCells);
910  printf("Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f||\n",
911  c.nEvents,
912  c.nHits / double(c.nEvents),
913  c.nCells / double(c.nEvents),
914  c.nTuples / double(c.nEvents),
915  c.nFitTracks / double(c.nEvents),
916  c.nLooseTracks / double(c.nEvents),
917  c.nGoodTracks / double(c.nEvents),
918  c.nUsedHits / double(c.nEvents),
919  c.nDupHits / double(c.nEvents),
920  c.nKilledCells / double(c.nEvents),
921  c.nEmptyCells / double(c.nCells),
922  c.nZeroTrackCells / double(c.nCells));
923 }
good
const auto good
min quality of good
Definition: CAHitNtupletGeneratorKernelsImpl.h:760
cms::cuda::OneToManyAssoc::n
__host__ __device__ const index_type uint32_t n
Definition: OneToManyAssoc.h:232
pixelCPEforGPU.h
cellNeighbors
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector * cellNeighbors
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
mps_fire.i
i
Definition: mps_fire.py:428
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
iev
const HitContainer *__restrict__ const TkSoA *__restrict__ const Quality *__restrict__ const CAHitNtupletGeneratorKernelsGPU::HitToTuple *__restrict__ int32_t int iev
Definition: CAHitNtupletGeneratorKernelsImpl.h:862
cuts
const TkSoA *__restrict__ CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts
Definition: CAHitNtupletGeneratorKernelsImpl.h:416
loose
constexpr auto loose
Definition: CAHitNtupletGeneratorKernelsImpl.h:182
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
detailsBasic3DVector::z
float float float z
Definition: extBasic3DVector.h:14
CaloTowersParam_cfi.mc
mc
Definition: CaloTowersParam_cfi.py:8
nt
int nt
Definition: AMPTWrapper.h:42
DiDispStaMuonMonitor_cfi.pt
pt
Definition: DiDispStaMuonMonitor_cfi.py:39
longTqual
const auto longTqual
Definition: CAHitNtupletGeneratorKernelsImpl.h:706
min
T min(T a, T b)
Definition: MathUtil.h:58
pixelTrack::Quality::bad
cAHitNtupletGenerator::QualityCuts
Definition: CAHitNtupletGeneratorKernels.h:39
isOuterHitOfCell
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell *__restrict__ isOuterHitOfCell
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
dupPassThrough
const uint32_t *__restrict__ HitContainer Quality bool dupPassThrough
Definition: CAHitNtupletGeneratorKernelsImpl.h:142
caConstants::last_bpix1_detIndex
constexpr uint32_t last_bpix1_detIndex
Definition: CAConstants.h:62
np
int np
Definition: AMPTWrapper.h:43
CAHitNtupletGeneratorKernels.h
TrackingRecHit2DHeterogeneous
Definition: TrackingRecHit2DHeterogeneous.h:8
pixelTrack::Quality::strict
cells
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ cells
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
gpuPixelDoublets.h
gpuPixelDoublets::ntot
__shared__ uint32_t ntot
Definition: gpuPixelDoubletsAlgos.h:67
nHits
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell *__restrict__ int32_t nHits
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
TrackingRecHit2DSOAView
Definition: TrackingRecHit2DSOAView.h:16
caConstants::HitToTuple
cms::cuda::OneToManyAssoc< tindex_type, -1, 4 *maxTuples > HitToTuple
Definition: CAConstants.h:77
full
Definition: GenABIO.cc:168
ptracks
const HitContainer *__restrict__ const TkSoA *__restrict__ ptracks
Definition: CAHitNtupletGeneratorKernelsImpl.h:634
caConstants::maxNumberOfQuadruplets
constexpr uint32_t maxNumberOfQuadruplets
Definition: CAConstants.h:41
hgcalVFEProducer_cfi.lsb
lsb
Definition: hgcalVFEProducer_cfi.py:80
h
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
nCells
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ nCells
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
gpuFishbone.h
hhp
const TrackingRecHit2DSOAView *__restrict__ hhp
Definition: CAHitNtupletGeneratorKernelsImpl.h:544
cms::cuda::OneToManyAssoc
Definition: OneToManyAssoc.h:143
pixelTrack::Quality::dup
caConstants::last_barrel_detIndex
constexpr uint32_t last_barrel_detIndex
Definition: CAConstants.h:63
__global__
#define __global__
Definition: cudaCompat.h:19
cms::cuda::SimpleVector
Definition: SimpleVector.h:15
heavyIonCSV_trainingSettings.idx
idx
Definition: heavyIonCSV_trainingSettings.py:5
quality
const uint32_t *__restrict__ Quality * quality
Definition: CAHitNtupletGeneratorKernelsImpl.h:122
pixelTrack::Quality
Quality
Definition: TrackSoAHeterogeneousT.h:13
pixelTrack::Quality::edup
h
counters
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell *__restrict__ int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
Definition: CAHitNtupletGeneratorKernelsImpl.h:53
DivergingColor.frac
float frac
Definition: DivergingColor.py:175
caConstants::TupleMultiplicity
cms::cuda::OneToManyAssoc< tindex_type, 8, maxTuples > TupleMultiplicity
Definition: CAConstants.h:78
CommonMethods.isnan
def isnan(num)
Definition: CommonMethods.py:97
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
pixelTrack::Quality::tight
first
auto first
Definition: CAHitNtupletGeneratorKernelsImpl.h:125
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
trackingPlots.dup
dup
Definition: trackingPlots.py:183
TrackSoAHeterogeneousT
Definition: TrackSoAHeterogeneousT.h:23
PixelPluginsPhase0_cfi.isBarrel
isBarrel
Definition: PixelPluginsPhase0_cfi.py:17
maxPrint
const HitContainer *__restrict__ const TkSoA *__restrict__ const Quality *__restrict__ const CAHitNtupletGeneratorKernelsGPU::HitToTuple *__restrict__ int32_t maxPrint
Definition: CAHitNtupletGeneratorKernelsImpl.h:857
svgfig.stack
stack
Definition: svgfig.py:559
hh
const auto & hh
Definition: CAHitNtupletGeneratorKernelsImpl.h:552
cms::cuda::AtomicPairCounter
Definition: AtomicPairCounter.h:11
l1end
int l1end
Definition: CAHitNtupletGeneratorKernelsImpl.h:713
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
tracks
const uint32_t *__restrict__ const HitContainer *__restrict__ TkSoA *__restrict__ tracks
Definition: CAHitNtupletGeneratorKernelsImpl.h:176
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
tupleMultiplicity
const caConstants::TupleMultiplicity * tupleMultiplicity
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
HLT_FULL_cff.region
region
Definition: HLT_FULL_cff.py:88286
createfilelist.int
int
Definition: createfilelist.py:10
cms::cuda::VecArray
Definition: VecArray.h:14
foundNtuplets
const uint32_t *__restrict__ HitContainer * foundNtuplets
Definition: CAHitNtupletGeneratorKernelsImpl.h:139
cudaCheck.h
cms::cuda::OneToManyAssoc::m
return c m
Definition: OneToManyAssoc.h:239
nhits
auto nhits
Definition: CAHitNtupletGeneratorKernelsImpl.h:553
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
cms::cuda::OneToManyAssoc::nOnes
constexpr auto nOnes() const
Definition: OneToManyAssoc.h:167
tuples
const HitContainer *__restrict__ tuples
Definition: CAHitNtupletGeneratorKernelsImpl.h:610
cms::cuda::OneToManyAssoc::totOnes
constexpr auto totOnes() const
Definition: OneToManyAssoc.h:166
cms::cuda::nh
uint32_t nh
Definition: HistoContainer.h:11
nmin
const HitContainer *__restrict__ const TkSoA *__restrict__ Quality *__restrict__ uint16_t nmin
Definition: CAHitNtupletGeneratorKernelsImpl.h:634
cms::cuda::OneToManyAssoc::content
content[w - 1]
Definition: OneToManyAssoc.h:229
cAHitNtupletGenerator::Counters
Definition: CAHitNtupletGeneratorKernels.h:14
maxNumberOfDoublets
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell *__restrict__ int32_t uint32_t maxNumberOfDoublets
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53
TrackingRecHit2DGPU
TrackingRecHit2DHeterogeneous< cms::cudacompat::GPUTraits > TrackingRecHit2DGPU
Definition: TrackingRecHit2DHeterogeneous.h:73
apc
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter * apc
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
pixelTrack::Quality::highPurity
genVertex_cff.x
x
Definition: genVertex_cff.py:13
GPUCACell.h
HGVHistoProducerAlgoBlock_cfi.maxScore
maxScore
Definition: HGVHistoProducerAlgoBlock_cfi.py:69
reject
const auto reject
Definition: CAHitNtupletGeneratorKernelsImpl.h:619
phitToTuple
const HitContainer *__restrict__ const Quality *__restrict__ const CAHitNtupletGeneratorKernelsGPU::HitToTuple *__restrict__ phitToTuple
Definition: CAHitNtupletGeneratorKernelsImpl.h:576
ptmin
double ptmin
Definition: HydjetWrapper.h:84
ptuples
const HitContainer *__restrict__ ptuples
Definition: CAHitNtupletGeneratorKernelsImpl.h:574
hitToTuple
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple * hitToTuple
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
hitDetIndices
const TrackingRecHit2DSOAView *__restrict__ HitContainer *__restrict__ hitDetIndices
Definition: CAHitNtupletGeneratorKernelsImpl.h:545
cellTracks
const caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple cms::cuda::AtomicPairCounter const GPUCACell *__restrict__ const uint32_t *__restrict__ const gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector * cellTracks
Definition: CAHitNtupletGeneratorKernelsImpl.h:43
PixelTripletNoTipGenerator_cfi.chi2Cut
chi2Cut
Definition: PixelTripletNoTipGenerator_cfi.py:10
pixelTrack::HitContainer
TrackSoA::HitContainer HitContainer
Definition: TrackSoAHeterogeneousT.h:78
GPUCACell
Definition: GPUCACell.h:20
funct::abs
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
pixelTrack::TrackSoA
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
Definition: TrackSoAHeterogeneousT.h:76
offlineSlimmedPrimaryVertices_cfi.score
score
Definition: offlineSlimmedPrimaryVertices_cfi.py:6
cuda_assert.h
c
auto & c
Definition: CAHitNtupletGeneratorKernelsImpl.h:56
dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66
cms::cudacompat::__ldg
T __ldg(T const *x)
Definition: cudaCompat.h:113
pixelTrack::Quality::loose
CAConstants.h
assert
assert(nCells)
cannot be loose
gpuPixelDoublets::stride
auto stride
Definition: gpuPixelDoubletsAlgos.h:80
nhj
int nhj
Definition: HydjetWrapper.h:73
cms::cuda::OneToManyAssoc::off
off[c.m]
Definition: OneToManyAssoc.h:236
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32