CMS 3D CMS Logo

CAHitNtupletGeneratorKernelsImpl.h
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 //#define NTUPLE_DEBUG
6 //#define GPU_DEBUG
7 
8 #include <cmath>
9 #include <cstdint>
10 #include <limits>
11 
12 #include <cuda_runtime.h>
13 
19 
21 #include "CAStructures.h"
22 #include "GPUCACell.h"
23 #include "gpuFishbone.h"
24 #include "gpuPixelDoublets.h"
25 
27 
30  constexpr float nSigma2 = 25.f;
31 
32  //all of these below are mostly to avoid brining around the relative namespace
33 
34  template <typename TrackerTraits>
36 
37  template <typename TrackerTraits>
39 
40  template <typename TrackerTraits>
42 
43  template <typename TrackerTraits>
45 
46  template <typename TrackerTraits>
48 
50 
51  template <typename TrackerTraits>
53 
54  template <typename TrackerTraits>
56 
57  template <typename TrackerTraits>
59 
60  template <typename TrackerTraits>
62 
63  template <typename TrackerTraits>
65 
67 
68  template <typename TrackerTraits>
69  __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
73  GPUCACellT<TrackerTraits> const *__restrict__ cells,
74  uint32_t const *__restrict__ nCells,
78  int32_t nHits,
79  uint32_t maxNumberOfDoublets,
81  auto first = threadIdx.x + blockIdx.x * blockDim.x;
82 
83  auto &c = *counters;
84  // counters once per event
85  if (0 == first) {
86  atomicAdd(&c.nEvents, 1);
87  atomicAdd(&c.nHits, nHits);
88  atomicAdd(&c.nCells, *nCells);
89  atomicAdd(&c.nTuples, apc->get().m);
90  atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
91  }
92 
93 #ifdef NTUPLE_DEBUG
94  if (0 == first) {
95  printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
96  *nCells,
97  apc->get().m,
98  apc->get().n,
99  nHits,
100  hitToTuple->totOnes());
101  if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
102  assert(tracks_view.hitIndices().size(apc->get().m) == 0);
103  assert(tracks_view.hitIndices().size() == apc->get().n);
104  }
105  }
106 
107  for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
108  if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit
109  printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
110  assert(tracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
111  for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
112  assert(int(*ih) < nHits);
113  }
114 #endif
115 
116  if (0 == first) {
117  if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
118  printf("Tuples overflow\n");
119  if (*nCells >= maxNumberOfDoublets)
120  printf("Cells overflow\n");
121  if (cellNeighbors && cellNeighbors->full())
122  printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
123  if (cellTracks && cellTracks->full())
124  printf("cellTracks overflow\n");
125  if (int(hitToTuple->nOnes()) < nHits)
126  printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
127 #ifdef GPU_DEBUG
128  printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
129  cellNeighbors->size(),
130  cellTracks->size(),
131  hitToTuple->size());
132 
133 #endif
134  }
135 
136  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
137  auto const &thisCell = cells[idx];
138  if (thisCell.hasFishbone() && !thisCell.isKilled())
139  atomicAdd(&c.nFishCells, 1);
140  if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId];
141  printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
142  if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId];
143  printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
144  if (thisCell.isKilled())
145  atomicAdd(&c.nKilledCells, 1);
146  if (!thisCell.unused())
147  atomicAdd(&c.nEmptyCells, 1);
148  if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
149  atomicAdd(&c.nZeroTrackCells, 1);
150  }
151 
152  for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
153  if (isOuterHitOfCell.container[idx].full()) // ++tooManyOuterHitOfCell;
154  printf("OuterHitOfCell overflow %d\n", idx);
155  }
156  }
157 
158  template <typename TrackerTraits>
159  __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
160  uint32_t const *__restrict__ nCells,
163 
164  auto first = threadIdx.x + blockIdx.x * blockDim.x;
165  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
166  auto const &thisCell = cells[idx];
167  if (!thisCell.isKilled())
168  continue;
169 
170  for (auto it : thisCell.tracks())
172  }
173  }
174 
175  // remove shorter tracks if sharing a cell
176  // It does not seem to affect efficiency in any way!
177  template <typename TrackerTraits>
178  __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
179  uint32_t const *__restrict__ nCells,
180  TkSoAView<TrackerTraits> tracks_view,
182  // quality to mark rejected
184 
185  assert(nCells);
186  auto first = threadIdx.x + blockIdx.x * blockDim.x;
187  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
188  auto const &thisCell = cells[idx];
189 
190  if (thisCell.tracks().size() < 2)
191  continue;
192 
193  int8_t maxNl = 0;
194 
195  // find maxNl
196  for (auto it : thisCell.tracks()) {
197  auto nl = tracks_view[it].nLayers();
198  maxNl = std::max(nl, maxNl);
199  }
200 
201  // if (maxNl<4) continue;
202  // quad pass through (leave it her for tests)
203  // maxNl = std::min(4, maxNl);
204 
205  for (auto it : thisCell.tracks()) {
206  if (tracks_view[it].nLayers() < maxNl)
207  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
208  }
209  }
210  }
211 
212  // assume the above (so, short tracks already removed)
213  template <typename TrackerTraits>
214  __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
215  uint32_t const *__restrict__ nCells,
216  TkSoAView<TrackerTraits> tracks_view,
217  bool dupPassThrough) {
218  // quality to mark rejected
221 
222  assert(nCells);
223 
224  auto first = threadIdx.x + blockIdx.x * blockDim.x;
225  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
226  auto const &thisCell = cells[idx];
227  if (thisCell.tracks().size() < 2)
228  continue;
229 
230  float mc = maxScore;
231  uint16_t im = tkNotFound;
232 
233  auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
234 
235  // full crazy combinatorics
236  // full crazy combinatorics
237  int ntr = thisCell.tracks().size();
238  for (int i = 0; i < ntr - 1; ++i) {
239  auto it = thisCell.tracks()[i];
240  auto qi = tracks_view[it].quality();
241  if (qi <= reject)
242  continue;
243  auto opi = tracks_view[it].state()(2);
244  auto e2opi = tracks_view[it].covariance()(9);
245  auto cti = tracks_view[it].state()(3);
246  auto e2cti = tracks_view[it].covariance()(12);
247  for (auto j = i + 1; j < ntr; ++j) {
248  auto jt = thisCell.tracks()[j];
249  auto qj = tracks_view[jt].quality();
250  if (qj <= reject)
251  continue;
252  auto opj = tracks_view[jt].state()(2);
253  auto ctj = tracks_view[jt].state()(3);
254  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
255  if ((cti - ctj) * (cti - ctj) > dct)
256  continue;
257  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
258  if ((opi - opj) * (opi - opj) > dop)
259  continue;
260  if ((qj < qi) || (qj == qi && score(it) < score(jt)))
261  tracks_view[jt].quality() = reject;
262  else {
263  tracks_view[it].quality() = reject;
264  break;
265  }
266  }
267  }
268 
269  // find maxQual
270  auto maxQual = reject; // no duplicate!
271  for (auto it : thisCell.tracks()) {
272  if (tracks_view[it].quality() > maxQual)
273  maxQual = tracks_view[it].quality();
274  }
275 
276  if (maxQual <= loose)
277  continue;
278 
279  // find min score
280  for (auto it : thisCell.tracks()) {
281  if (tracks_view[it].quality() == maxQual && score(it) < mc) {
282  mc = score(it);
283  im = it;
284  }
285  }
286 
287  if (tkNotFound == im)
288  continue;
289 
290  // mark all other duplicates (not yet, keep it loose)
291  for (auto it : thisCell.tracks()) {
292  if (tracks_view[it].quality() > loose && it != im)
293  tracks_view[it].quality() = loose; //no race: simple assignment of the same constant
294  }
295  }
296  }
297 
298  template <typename TrackerTraits>
299  __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
300  cms::cuda::AtomicPairCounter *apc2, // just to zero them,
303  uint32_t const *__restrict__ nCells,
308 
309  auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
310  auto first = threadIdx.x;
311  auto stride = blockDim.x;
312 
313  if (0 == (firstCellIndex + first)) {
314  (*apc1) = 0;
315  (*apc2) = 0;
316  } // ready for next kernel
317 
318  constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
319  constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
320  for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
321  auto cellIndex = idx;
322  auto &thisCell = cells[idx];
323  auto innerHitId = thisCell.inner_hit_id();
324  if (int(innerHitId) < isOuterHitOfCell.offset)
325  continue;
326  int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
327  auto vi = isOuterHitOfCell[innerHitId].data();
328 
329  auto ri = thisCell.inner_r(hh);
330  auto zi = thisCell.inner_z(hh);
331 
332  auto ro = thisCell.outer_r(hh);
333  auto zo = thisCell.outer_z(hh);
334  auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
335 
336  for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
337  auto otherCell = __ldg(vi + j);
338  auto &oc = cells[otherCell];
339  auto r1 = oc.inner_r(hh);
340  auto z1 = oc.inner_z(hh);
341  bool aligned = Cell::areAlignedRZ(
342  r1,
343  z1,
344  ri,
345  zi,
346  ro,
347  zo,
348  params.ptmin_,
349  isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts
350  if (aligned && thisCell.dcaCut(hh,
351  oc,
352  oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
353  : params.dcaCutOuterTriplet_,
354  params.hardCurvCut_)) { // FIXME tune cuts
355  oc.addOuterNeighbor(cellIndex, *cellNeighbors);
356  thisCell.setStatusBits(Cell::StatusBit::kUsed);
357  oc.setStatusBits(Cell::StatusBit::kUsed);
358  }
359  } // loop on inner cells
360  } // loop on outer cells
361  }
362 
363  template <typename TrackerTraits>
364  __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
365  TkSoAView<TrackerTraits> tracks_view,
366  GPUCACellT<TrackerTraits> *__restrict__ cells,
367  uint32_t const *nCells,
368  CellTracksVector<TrackerTraits> *cellTracks,
371  // recursive: not obvious to widen
372 
374 
375  auto first = threadIdx.x + blockIdx.x * blockDim.x;
376 
377 #ifdef GPU_DEBUG
378  if (first == 0)
379  printf("starting producing ntuplets from %d cells \n", *nCells);
380 #endif
381  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
382  auto const &thisCell = cells[idx];
383 
384  if (thisCell.isKilled())
385  continue; // cut by earlyFishbone
386 
387  // we require at least three hits...
388  if (thisCell.outerNeighbors().empty())
389  continue;
390 
391  auto pid = thisCell.layerPairId();
392  bool doit = params.startingLayerPair(pid);
393 
395  if (doit) {
396  typename Cell::TmpTuple stack;
397  stack.reset();
398 
399  bool bpix1Start = params.startAt0(pid);
400 
401  thisCell.template find_ntuplets<maxDepth>(hh,
402  cells,
403  *cellTracks,
404  tracks_view.hitIndices(),
405  *apc,
406  tracks_view.quality(),
407  stack,
408  params.minHitsPerNtuplet_,
409  bpix1Start);
410 
411  assert(stack.empty());
412  }
413  }
414  }
415  template <typename TrackerTraits>
416  __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
417  auto first = threadIdx.x + blockIdx.x * blockDim.x;
419  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
420  auto &thisCell = cells[idx];
421  if (!thisCell.tracks().empty())
422  thisCell.setStatusBits(Cell::StatusBit::kInTrack);
423  }
424  }
425 
426  template <typename TrackerTraits>
427  __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
429  auto first = blockIdx.x * blockDim.x + threadIdx.x;
430  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
431  auto nhits = tracks_view.hitIndices().size(it);
432  if (nhits < 3)
433  continue;
435  continue;
437  if (nhits > TrackerTraits::maxHitsOnTrack) // current limit
438  printf("wrong mult %d %d\n", it, nhits);
439  assert(nhits <= TrackerTraits::maxHitsOnTrack);
440  tupleMultiplicity->count(nhits);
441  }
442  }
443 
444  template <typename TrackerTraits>
445  __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
447  auto first = blockIdx.x * blockDim.x + threadIdx.x;
448  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
449  auto nhits = tracks_view.hitIndices().size(it);
450  if (nhits < 3)
451  continue;
453  continue;
455  if (nhits > TrackerTraits::maxHitsOnTrack)
456  printf("wrong mult %d %d\n", it, nhits);
457  assert(nhits <= TrackerTraits::maxHitsOnTrack);
458  tupleMultiplicity->fill(nhits, it);
459  }
460  }
461 
463  template <typename TrackerTraits>
465  // Quality *__restrict__ quality) {
466  int first = blockDim.x * blockIdx.x + threadIdx.x;
467  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
468  auto nhits = tracks_view.hitIndices().size(it);
469  if (nhits == 0)
470  break; // guard
471 
472  // if duplicate: not even fit
474  continue;
475 
477 
478  // mark doublets as bad
479  if (nhits < 3)
480  continue;
481 
482  // if the fit has any invalid parameters, mark it as bad
483  bool isNaN = false;
484  for (int i = 0; i < 5; ++i) {
485  isNaN |= std::isnan(tracks_view[it].state()(i));
486  }
487  if (isNaN) {
488 #ifdef NTUPLE_DEBUG
489  printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
490 #endif
491  continue;
492  }
493 
495 
496  if (cuts.strictCut(tracks_view, it))
497  continue;
498 
500 
501  if (cuts.isHP(tracks_view, nhits, it))
503  }
504  }
505 
506  template <typename TrackerTraits>
507  __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
508  int first = blockDim.x * blockIdx.x + threadIdx.x;
509  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
510  if (tracks_view.hitIndices().size(idx) == 0)
511  break; //guard
513  continue;
514  atomicAdd(&(counters->nLooseTracks), 1);
516  continue;
517  atomicAdd(&(counters->nGoodTracks), 1);
518  }
519  }
520 
521  template <typename TrackerTraits>
522  __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
523  int first = blockDim.x * blockIdx.x + threadIdx.x;
524  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
525  if (tracks_view.hitIndices().size(idx) == 0)
526  break; // guard
527  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
528  hitToTuple->count(*h);
529  }
530  }
531 
532  template <typename TrackerTraits>
533  __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
534  int first = blockDim.x * blockIdx.x + threadIdx.x;
535  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
536  if (tracks_view.hitIndices().size(idx) == 0)
537  break; // guard
538  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
539  hitToTuple->fill(*h, idx);
540  }
541  }
542 
543  template <typename TrackerTraits>
545  int first = blockDim.x * blockIdx.x + threadIdx.x;
546  // copy offsets
547  for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
548  tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
549  }
550  // fill hit indices
551  auto nhits = hh.nHits();
552 
553  for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
554  assert(tracks_view.hitIndices().content[idx] < nhits);
555  tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
556  }
557  }
558 
559  template <typename TrackerTraits>
560  __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
561  auto first = blockIdx.x * blockDim.x + threadIdx.x;
562  // clamp the number of tracks to the capacity of the SoA
563  auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
564  if (0 == first)
565  tracks_view.nTracks() = ntracks;
566  for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
568  assert(nHits >= 3);
570  }
571  }
572 
573  template <typename TrackerTraits>
574  __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
575  Counters *counters) {
576  auto &c = *counters;
577  int first = blockDim.x * blockIdx.x + threadIdx.x;
578  for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
579  if (hitToTuple->size(idx) == 0)
580  continue; // SHALL NOT BE break
581  atomicAdd(&c.nUsedHits, 1);
582  if (hitToTuple->size(idx) > 1)
583  atomicAdd(&c.nDupHits, 1);
584  }
585  }
586 
587  template <typename TrackerTraits>
588  __global__ void kernel_countSharedHit(int *__restrict__ nshared,
590  Quality const *__restrict__ quality,
591  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
593 
594  auto &hitToTuple = *phitToTuple;
595  auto const &foundNtuplets = *ptuples;
596 
597  int first = blockDim.x * blockIdx.x + threadIdx.x;
598  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
599  if (hitToTuple.size(idx) < 2)
600  continue;
601 
602  int nt = 0;
603 
604  // count "good" tracks
605  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
606  if (quality[*it] < loose)
607  continue;
608  ++nt;
609  }
610 
611  if (nt < 2)
612  continue;
613 
614  // now mark each track triplet as sharing a hit
615  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
616  if (foundNtuplets.size(*it) > 3)
617  continue;
618  atomicAdd(&nshared[*it], 1);
619  }
620 
621  } // hit loop
622  }
623 
624  template <typename TrackerTraits>
625  __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
626  HitContainer<TrackerTraits> const *__restrict__ tuples,
627  Quality *__restrict__ quality,
628  bool dupPassThrough) {
629  // constexpr auto bad = pixelTrack::Quality::bad;
632  // constexpr auto strict = pixelTrack::Quality::strict;
633 
634  // quality to mark rejected
635  auto const reject = dupPassThrough ? loose : dup;
636 
637  int first = blockDim.x * blockIdx.x + threadIdx.x;
638  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
639  if (tuples->size(idx) == 0)
640  break; //guard
641  if (quality[idx] <= reject)
642  continue;
643  if (nshared[idx] > 2)
644  quality[idx] = reject;
645  }
646  }
647 
648  // mostly for very forward triplets.....
649  template <typename TrackerTraits>
650  __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
651  uint16_t nmin,
652  bool dupPassThrough,
653  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
654  // quality to mark rejected
656 
657  auto &hitToTuple = *phitToTuple;
658 
659  int first = blockDim.x * blockIdx.x + threadIdx.x;
660  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
661  if (hitToTuple.size(idx) < 2)
662  continue;
663 
664  auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
665 
666  // full combinatorics
667  for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
668  auto const it = *ip;
669  auto qi = tracks_view[it].quality();
670  if (qi <= reject)
671  continue;
672  auto opi = tracks_view[it].state()(2);
673  auto e2opi = tracks_view[it].covariance()(9);
674  auto cti = tracks_view[it].state()(3);
675  auto e2cti = tracks_view[it].covariance()(12);
676  auto nli = tracks_view[it].nLayers();
677  for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
678  auto const jt = *jp;
679  auto qj = tracks_view[jt].quality();
680  if (qj <= reject)
681  continue;
682  auto opj = tracks_view[jt].state()(2);
683  auto ctj = tracks_view[jt].state()(3);
684  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
685  if ((cti - ctj) * (cti - ctj) > dct)
686  continue;
687  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
688  if ((opi - opj) * (opi - opj) > dop)
689  continue;
690  auto nlj = tracks_view[jt].nLayers();
691  if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
692  tracks_view[jt].quality() = reject;
693  else {
694  tracks_view[it].quality() = reject;
695  break;
696  }
697  }
698  }
699  }
700  }
701 
702  template <typename TrackerTraits>
703  __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
704  TkSoAView<TrackerTraits> tracks_view,
705  int nmin,
706  bool dupPassThrough,
707  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
708  // quality to mark rejected
710  // quality of longest track
712 
713  auto &hitToTuple = *phitToTuple;
714 
715  int l1end = hh.hitsLayerStart()[1];
716 
717  int first = blockDim.x * blockIdx.x + threadIdx.x;
718  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
719  if (hitToTuple.size(idx) < 2)
720  continue;
721 
722  int8_t maxNl = 0;
723 
724  // find maxNl
725  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
726  if (tracks_view[*it].quality() < longTqual)
727  continue;
728  // if (tracks_view[*it].nHits()==3) continue;
729  auto nl = tracks_view[*it].nLayers();
730  maxNl = std::max(nl, maxNl);
731  }
732 
733  if (maxNl < 4)
734  continue;
735 
736  // quad pass through (leave for tests)
737  // maxNl = std::min(4, maxNl);
738 
739  // kill all tracks shorter than maxHl (only triplets???
740  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
741  auto nl = tracks_view[*it].nLayers();
742 
743  //checking if shared hit is on bpix1 and if the tuple is short enough
744  if (idx < l1end and nl > nmin)
745  continue;
746 
747  if (nl < maxNl && tracks_view[*it].quality() > reject)
748  tracks_view[*it].quality() = reject;
749  }
750  }
751  }
752 
753  template <typename TrackerTraits>
754  __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
755  uint16_t nmin,
756  bool dupPassThrough,
757  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
758  // quality to mark rejected
759  auto const reject = pixelTrack::Quality::loose;
762 
763  auto &hitToTuple = *phitToTuple;
764 
765  int first = blockDim.x * blockIdx.x + threadIdx.x;
766  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
767  if (hitToTuple.size(idx) < 2)
768  continue;
769 
770  float mc = maxScore;
771  uint16_t im = tkNotFound;
772  bool onlyTriplets = true;
773 
774  // check if only triplets
775  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
776  if (tracks_view[*it].quality() <= good)
777  continue;
779  if (!onlyTriplets)
780  break;
781  }
782 
783  // only triplets
784  if (!onlyTriplets)
785  continue;
786 
787  // for triplets choose best tip! (should we first find best quality???)
788  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
789  auto const it = *ip;
792  im = it;
793  }
794  }
795 
796  if (tkNotFound == im)
797  continue;
798 
799  // mark worse ambiguities
800  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
801  auto const it = *ip;
802  if (tracks_view[it].quality() > reject && it != im)
803  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
804  }
805 
806  } // loop over hits
807  }
808 
809  template <typename TrackerTraits>
810  __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
811  uint16_t nmin,
812  bool dupPassThrough,
813  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
814  // quality to mark rejected
815  auto const reject = pixelTrack::Quality::loose;
817  auto const good = pixelTrack::Quality::loose;
818 
819  auto &hitToTuple = *phitToTuple;
820 
821  int first = blockDim.x * blockIdx.x + threadIdx.x;
822  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
823  if (hitToTuple.size(idx) < 2)
824  continue;
825 
826  float mc = maxScore;
827  uint16_t im = tkNotFound;
828 
829  // choose best tip! (should we first find best quality???)
830  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
831  auto const it = *ip;
834  im = it;
835  }
836  }
837 
838  if (tkNotFound == im)
839  continue;
840 
841  // mark worse ambiguities
842  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
843  auto const it = *ip;
845  it != im)
846  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
847  }
848 
849  } // loop over hits
850  }
851 
852  template <typename TrackerTraits>
853  __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
855  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
856  int32_t firstPrint,
857  int32_t lastPrint,
858  int iev) {
860 
861  int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
862  for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
863  i += blockDim.x * gridDim.x) {
864  auto nh = tracks_view.hitIndices().size(i);
865  if (nh < 3)
866  continue;
867  if (tracks_view[i].quality() < loose)
868  continue;
869  printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
870  10000 * iev + i,
871  int(tracks_view[i].quality()),
872  nh,
873  tracks_view[i].nLayers(),
875  tracks_view[i].pt(),
876  tracks_view[i].eta(),
880  tracks_view[i].chi2(),
881  hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
882  hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
883  hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
884  nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
885  nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
886  nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
887  nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
888  }
889  }
890 
891  __global__ void kernel_printCounters(Counters const *counters) {
892  auto const &c = *counters;
893  printf(
894  "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
895  "nDupHits | "
896  "nFishCells | "
897  "nKilledCells | "
898  "nUsedCells | nZeroTrackCells ||\n");
899  printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
900  c.nEvents,
901  c.nHits,
902  c.nCells,
903  c.nTuples,
904  c.nFitTracks,
905  c.nLooseTracks,
906  c.nGoodTracks,
907  c.nUsedHits,
908  c.nDupHits,
909  c.nFishCells,
910  c.nKilledCells,
911  c.nEmptyCells,
912  c.nZeroTrackCells);
913  printf(
914  "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
915  c.nEvents,
916  c.nHits / double(c.nEvents),
917  c.nCells / double(c.nEvents),
918  c.nTuples / double(c.nEvents),
919  c.nFitTracks / double(c.nEvents),
920  c.nLooseTracks / double(c.nEvents),
921  c.nGoodTracks / double(c.nEvents),
922  c.nUsedHits / double(c.nEvents),
923  c.nDupHits / double(c.nEvents),
924  c.nFishCells / double(c.nCells),
925  c.nKilledCells / double(c.nCells),
926  c.nEmptyCells / double(c.nCells),
927  c.nZeroTrackCells / double(c.nCells));
928  }
929 
930 } // namespace caHitNtupletGeneratorKernels
const dim3 threadIdx
Definition: cudaCompat.h:29
HitContainer< TrackerTraits > const *__restrict__ tuples
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const isOuterHitOfCell
def isnan(num)
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ quality
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
const dim3 gridDim
Definition: cudaCompat.h:33
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const * cellNeighbors
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const * cellTracks
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
caHitNtupletGenerator::Counters Counters
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ cells
TupleMultiplicity< TrackerTraits > const * tupleMultiplicity
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > bool dupPassThrough
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t maxNumberOfDoublets
TkSoAView< TrackerTraits > GPUCACellT< TrackerTraits > *__restrict__ uint32_t const CellTracksVector< TrackerTraits > cms::cuda::AtomicPairCounter CAParams< TrackerTraits > params
constexpr uint32_t stride
Definition: HelixFit.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t Counters * counters
int np
Definition: AMPTWrapper.h:43
static constexpr __host__ __device__ int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i)
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t nHits
typename TrackSoA< TrackerTraits >::HitContainer HitContainer
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t firstPrint
__device__ __host__ Counters get() const
stack
Definition: svgfig.py:559
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const * hitToTuple
TrackSoAView< TrackerTraits > TkSoAView
uint32_t nh
const dim3 blockIdx
Definition: cudaCompat.h:32
assert(nCells)
cannot be loose
int nt
Definition: AMPTWrapper.h:42
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ nCells
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
T __ldg(T const *x)
Definition: cudaCompat.h:137
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::View TrackSoAView
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t lastPrint
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ HitToTuple< TrackerTraits > const *__restrict__ phitToTuple
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
HitContainer< TrackerTraits > const *__restrict__ ptuples