CMS 3D CMS Logo

CAHitNtupletGeneratorKernelsImpl.h
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 //#define NTUPLE_DEBUG
6 //#define GPU_DEBUG
7 
8 #include <cmath>
9 #include <cstdint>
10 #include <limits>
11 
12 #include <cuda_runtime.h>
13 
19 
21 
23 #include "CAStructures.h"
24 #include "GPUCACell.h"
25 #include "gpuFishbone.h"
26 #include "gpuPixelDoublets.h"
27 
29 
32  constexpr float nSigma2 = 25.f;
33 
34  //all of these below are mostly to avoid brining around the relative namespace
35 
36  template <typename TrackerTraits>
38 
39  template <typename TrackerTraits>
41 
42  template <typename TrackerTraits>
44 
45  template <typename TrackerTraits>
47 
48  template <typename TrackerTraits>
50 
52 
53  template <typename TrackerTraits>
55 
56  template <typename TrackerTraits>
58 
59  template <typename TrackerTraits>
61 
62  template <typename TrackerTraits>
64 
65  template <typename TrackerTraits>
67 
69 
70  template <typename TrackerTraits>
71  __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
75  GPUCACellT<TrackerTraits> const *__restrict__ cells,
76  uint32_t const *__restrict__ nCells,
80  int32_t nHits,
81  uint32_t maxNumberOfDoublets,
83  auto first = threadIdx.x + blockIdx.x * blockDim.x;
84 
85  auto &c = *counters;
86  // counters once per event
87  if (0 == first) {
88  atomicAdd(&c.nEvents, 1);
89  atomicAdd(&c.nHits, nHits);
90  atomicAdd(&c.nCells, *nCells);
91  atomicAdd(&c.nTuples, apc->get().m);
92  atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
93  }
94 
95 #ifdef NTUPLE_DEBUG
96  if (0 == first) {
97  printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
98  *nCells,
99  apc->get().m,
100  apc->get().n,
101  nHits,
102  hitToTuple->totOnes());
103  if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
104  assert(tracks_view.hitIndices().size(apc->get().m) == 0);
105  assert(tracks_view.hitIndices().size() == apc->get().n);
106  }
107  }
108 
109  for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
110  if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit
111  printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
112  assert(tracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
113  for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
114  assert(int(*ih) < nHits);
115  }
116 #endif
117 
118  if (0 == first) {
119  if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
120  printf("Tuples overflow\n");
121  if (*nCells >= maxNumberOfDoublets)
122  printf("Cells overflow\n");
123  if (cellNeighbors && cellNeighbors->full())
124  printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
125  if (cellTracks && cellTracks->full())
126  printf("cellTracks overflow\n");
127  if (int(hitToTuple->nOnes()) < nHits)
128  printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
129 #ifdef GPU_DEBUG
130  printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
131  cellNeighbors->size(),
132  cellTracks->size(),
133  hitToTuple->size());
134 
135 #endif
136  }
137 
138  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
139  auto const &thisCell = cells[idx];
140  if (thisCell.hasFishbone() && !thisCell.isKilled())
141  atomicAdd(&c.nFishCells, 1);
142  if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId];
143  printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
144  if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId];
145  printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
146  if (thisCell.isKilled())
147  atomicAdd(&c.nKilledCells, 1);
148  if (!thisCell.unused())
149  atomicAdd(&c.nEmptyCells, 1);
150  if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
151  atomicAdd(&c.nZeroTrackCells, 1);
152  }
153 
154  for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
155  if (isOuterHitOfCell.container[idx].full()) // ++tooManyOuterHitOfCell;
156  printf("OuterHitOfCell overflow %d\n", idx);
157  }
158  }
159 
160  template <typename TrackerTraits>
161  __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
162  uint32_t const *__restrict__ nCells,
165 
166  auto first = threadIdx.x + blockIdx.x * blockDim.x;
167  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
168  auto const &thisCell = cells[idx];
169  if (!thisCell.isKilled())
170  continue;
171 
172  for (auto it : thisCell.tracks())
174  }
175  }
176 
177  // remove shorter tracks if sharing a cell
178  // It does not seem to affect efficiency in any way!
179  template <typename TrackerTraits>
180  __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
181  uint32_t const *__restrict__ nCells,
182  TkSoAView<TrackerTraits> tracks_view,
184  // quality to mark rejected
186 
187  assert(nCells);
188  auto first = threadIdx.x + blockIdx.x * blockDim.x;
189  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
190  auto const &thisCell = cells[idx];
191 
192  if (thisCell.tracks().size() < 2)
193  continue;
194 
195  int8_t maxNl = 0;
196 
197  // find maxNl
198  for (auto it : thisCell.tracks()) {
199  auto nl = tracks_view[it].nLayers();
200  maxNl = std::max(nl, maxNl);
201  }
202 
203  // if (maxNl<4) continue;
204  // quad pass through (leave it her for tests)
205  // maxNl = std::min(4, maxNl);
206 
207  for (auto it : thisCell.tracks()) {
208  if (tracks_view[it].nLayers() < maxNl)
209  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
210  }
211  }
212  }
213 
214  // assume the above (so, short tracks already removed)
215  template <typename TrackerTraits>
216  __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
217  uint32_t const *__restrict__ nCells,
218  TkSoAView<TrackerTraits> tracks_view,
219  bool dupPassThrough) {
220  // quality to mark rejected
223 
224  assert(nCells);
225 
226  auto first = threadIdx.x + blockIdx.x * blockDim.x;
227  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
228  auto const &thisCell = cells[idx];
229  if (thisCell.tracks().size() < 2)
230  continue;
231 
232  float mc = maxScore;
233  uint16_t im = tkNotFound;
234 
235  auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
236 
237  // full crazy combinatorics
238  // full crazy combinatorics
239  int ntr = thisCell.tracks().size();
240  for (int i = 0; i < ntr - 1; ++i) {
241  auto it = thisCell.tracks()[i];
242  auto qi = tracks_view[it].quality();
243  if (qi <= reject)
244  continue;
245  auto opi = tracks_view[it].state()(2);
246  auto e2opi = tracks_view[it].covariance()(9);
247  auto cti = tracks_view[it].state()(3);
248  auto e2cti = tracks_view[it].covariance()(12);
249  for (auto j = i + 1; j < ntr; ++j) {
250  auto jt = thisCell.tracks()[j];
251  auto qj = tracks_view[jt].quality();
252  if (qj <= reject)
253  continue;
254  auto opj = tracks_view[jt].state()(2);
255  auto ctj = tracks_view[jt].state()(3);
256  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
257  if ((cti - ctj) * (cti - ctj) > dct)
258  continue;
259  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
260  if ((opi - opj) * (opi - opj) > dop)
261  continue;
262  if ((qj < qi) || (qj == qi && score(it) < score(jt)))
263  tracks_view[jt].quality() = reject;
264  else {
265  tracks_view[it].quality() = reject;
266  break;
267  }
268  }
269  }
270 
271  // find maxQual
272  auto maxQual = reject; // no duplicate!
273  for (auto it : thisCell.tracks()) {
274  if (tracks_view[it].quality() > maxQual)
275  maxQual = tracks_view[it].quality();
276  }
277 
278  if (maxQual <= loose)
279  continue;
280 
281  // find min score
282  for (auto it : thisCell.tracks()) {
283  if (tracks_view[it].quality() == maxQual && score(it) < mc) {
284  mc = score(it);
285  im = it;
286  }
287  }
288 
289  if (tkNotFound == im)
290  continue;
291 
292  // mark all other duplicates (not yet, keep it loose)
293  for (auto it : thisCell.tracks()) {
294  if (tracks_view[it].quality() > loose && it != im)
295  tracks_view[it].quality() = loose; //no race: simple assignment of the same constant
296  }
297  }
298  }
299 
300  template <typename TrackerTraits>
301  __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
302  cms::cuda::AtomicPairCounter *apc2, // just to zero them,
305  uint32_t const *__restrict__ nCells,
310 
311  auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
312  auto first = threadIdx.x;
313  auto stride = blockDim.x;
314 
315  if (0 == (firstCellIndex + first)) {
316  (*apc1) = 0;
317  (*apc2) = 0;
318  } // ready for next kernel
319 
320  constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
321  constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
322  for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
323  auto cellIndex = idx;
324  auto &thisCell = cells[idx];
325  auto innerHitId = thisCell.inner_hit_id();
326  if (int(innerHitId) < isOuterHitOfCell.offset)
327  continue;
328  int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
329  auto vi = isOuterHitOfCell[innerHitId].data();
330 
331  auto ri = thisCell.inner_r(hh);
332  auto zi = thisCell.inner_z(hh);
333 
334  auto ro = thisCell.outer_r(hh);
335  auto zo = thisCell.outer_z(hh);
336  auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
337 
338  for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
339  auto otherCell = __ldg(vi + j);
340  auto &oc = cells[otherCell];
341  auto r1 = oc.inner_r(hh);
342  auto z1 = oc.inner_z(hh);
343  bool aligned = Cell::areAlignedRZ(
344  r1,
345  z1,
346  ri,
347  zi,
348  ro,
349  zo,
350  params.ptmin_,
351  isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts
352  if (aligned && thisCell.dcaCut(hh,
353  oc,
354  oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
355  : params.dcaCutOuterTriplet_,
356  params.hardCurvCut_)) { // FIXME tune cuts
357  oc.addOuterNeighbor(cellIndex, *cellNeighbors);
358  thisCell.setStatusBits(Cell::StatusBit::kUsed);
359  oc.setStatusBits(Cell::StatusBit::kUsed);
360  }
361  } // loop on inner cells
362  } // loop on outer cells
363  }
364 
365  template <typename TrackerTraits>
366  __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
367  TkSoAView<TrackerTraits> tracks_view,
368  GPUCACellT<TrackerTraits> *__restrict__ cells,
369  uint32_t const *nCells,
370  CellTracksVector<TrackerTraits> *cellTracks,
373  // recursive: not obvious to widen
374 
376 
377  auto first = threadIdx.x + blockIdx.x * blockDim.x;
378 
379 #ifdef GPU_DEBUG
380  if (first == 0)
381  printf("starting producing ntuplets from %d cells \n", *nCells);
382 #endif
383  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
384  auto const &thisCell = cells[idx];
385 
386  if (thisCell.isKilled())
387  continue; // cut by earlyFishbone
388 
389  // we require at least three hits...
390  if (thisCell.outerNeighbors().empty())
391  continue;
392 
393  auto pid = thisCell.layerPairId();
394  bool doit = params.startingLayerPair(pid);
395 
397  if (doit) {
398  typename Cell::TmpTuple stack;
399  stack.reset();
400 
401  bool bpix1Start = params.startAt0(pid);
402 
403  thisCell.template find_ntuplets<maxDepth>(hh,
404  cells,
405  *cellTracks,
406  tracks_view.hitIndices(),
407  *apc,
408  tracks_view.quality(),
409  stack,
410  params.minHitsPerNtuplet_,
411  bpix1Start);
412 
413  assert(stack.empty());
414  }
415  }
416  }
417  template <typename TrackerTraits>
418  __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
419  auto first = threadIdx.x + blockIdx.x * blockDim.x;
421  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
422  auto &thisCell = cells[idx];
423  if (!thisCell.tracks().empty())
424  thisCell.setStatusBits(Cell::StatusBit::kInTrack);
425  }
426  }
427 
428  template <typename TrackerTraits>
429  __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
431  auto first = blockIdx.x * blockDim.x + threadIdx.x;
432  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
433  auto nhits = tracks_view.hitIndices().size(it);
434  if (nhits < 3)
435  continue;
437  continue;
439  if (nhits > TrackerTraits::maxHitsOnTrack) // current limit
440  printf("wrong mult %d %d\n", it, nhits);
441  assert(nhits <= TrackerTraits::maxHitsOnTrack);
442  tupleMultiplicity->count(nhits);
443  }
444  }
445 
446  template <typename TrackerTraits>
447  __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
449  auto first = blockIdx.x * blockDim.x + threadIdx.x;
450  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
451  auto nhits = tracks_view.hitIndices().size(it);
452  if (nhits < 3)
453  continue;
455  continue;
457  if (nhits > TrackerTraits::maxHitsOnTrack)
458  printf("wrong mult %d %d\n", it, nhits);
459  assert(nhits <= TrackerTraits::maxHitsOnTrack);
460  tupleMultiplicity->fill(nhits, it);
461  }
462  }
463 
465  template <typename TrackerTraits>
467  // Quality *__restrict__ quality) {
468  int first = blockDim.x * blockIdx.x + threadIdx.x;
469  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
470  auto nhits = tracks_view.hitIndices().size(it);
471  if (nhits == 0)
472  break; // guard
473 
474  // if duplicate: not even fit
476  continue;
477 
479 
480  // mark doublets as bad
481  if (nhits < 3)
482  continue;
483 
484  // if the fit has any invalid parameters, mark it as bad
485  bool isNaN = false;
486  for (int i = 0; i < 5; ++i) {
487  isNaN |= edm::isNotFinite(tracks_view[it].state()(i));
488  }
489  if (isNaN) {
490 #ifdef NTUPLE_DEBUG
491  printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
492 #endif
493  continue;
494  }
495 
497 
498  if (cuts.strictCut(tracks_view, it))
499  continue;
500 
502 
503  if (cuts.isHP(tracks_view, nhits, it))
505  }
506  }
507 
508  template <typename TrackerTraits>
509  __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
510  int first = blockDim.x * blockIdx.x + threadIdx.x;
511  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
512  if (tracks_view.hitIndices().size(idx) == 0)
513  break; //guard
515  continue;
516  atomicAdd(&(counters->nLooseTracks), 1);
518  continue;
519  atomicAdd(&(counters->nGoodTracks), 1);
520  }
521  }
522 
523  template <typename TrackerTraits>
524  __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
525  int first = blockDim.x * blockIdx.x + threadIdx.x;
526  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
527  if (tracks_view.hitIndices().size(idx) == 0)
528  break; // guard
529  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
530  hitToTuple->count(*h);
531  }
532  }
533 
534  template <typename TrackerTraits>
535  __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
536  int first = blockDim.x * blockIdx.x + threadIdx.x;
537  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
538  if (tracks_view.hitIndices().size(idx) == 0)
539  break; // guard
540  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
541  hitToTuple->fill(*h, idx);
542  }
543  }
544 
545  template <typename TrackerTraits>
547  int first = blockDim.x * blockIdx.x + threadIdx.x;
548  // copy offsets
549  for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
550  tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
551  }
552  // fill hit indices
553  auto nhits = hh.nHits();
554 
555  for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
556  assert(tracks_view.hitIndices().content[idx] < nhits);
557  tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
558  }
559  }
560 
561  template <typename TrackerTraits>
562  __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
563  auto first = blockIdx.x * blockDim.x + threadIdx.x;
564  // clamp the number of tracks to the capacity of the SoA
565  auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
566  if (0 == first)
567  tracks_view.nTracks() = ntracks;
568  for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
570  assert(nHits >= 3);
572  }
573  }
574 
575  template <typename TrackerTraits>
576  __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
577  Counters *counters) {
578  auto &c = *counters;
579  int first = blockDim.x * blockIdx.x + threadIdx.x;
580  for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
581  if (hitToTuple->size(idx) == 0)
582  continue; // SHALL NOT BE break
583  atomicAdd(&c.nUsedHits, 1);
584  if (hitToTuple->size(idx) > 1)
585  atomicAdd(&c.nDupHits, 1);
586  }
587  }
588 
589  template <typename TrackerTraits>
590  __global__ void kernel_countSharedHit(int *__restrict__ nshared,
592  Quality const *__restrict__ quality,
593  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
595 
596  auto &hitToTuple = *phitToTuple;
597  auto const &foundNtuplets = *ptuples;
598 
599  int first = blockDim.x * blockIdx.x + threadIdx.x;
600  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
601  if (hitToTuple.size(idx) < 2)
602  continue;
603 
604  int nt = 0;
605 
606  // count "good" tracks
607  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
608  if (quality[*it] < loose)
609  continue;
610  ++nt;
611  }
612 
613  if (nt < 2)
614  continue;
615 
616  // now mark each track triplet as sharing a hit
617  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
618  if (foundNtuplets.size(*it) > 3)
619  continue;
620  atomicAdd(&nshared[*it], 1);
621  }
622 
623  } // hit loop
624  }
625 
626  template <typename TrackerTraits>
627  __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
628  HitContainer<TrackerTraits> const *__restrict__ tuples,
629  Quality *__restrict__ quality,
630  bool dupPassThrough) {
631  // constexpr auto bad = pixelTrack::Quality::bad;
634  // constexpr auto strict = pixelTrack::Quality::strict;
635 
636  // quality to mark rejected
637  auto const reject = dupPassThrough ? loose : dup;
638 
639  int first = blockDim.x * blockIdx.x + threadIdx.x;
640  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
641  if (tuples->size(idx) == 0)
642  break; //guard
643  if (quality[idx] <= reject)
644  continue;
645  if (nshared[idx] > 2)
646  quality[idx] = reject;
647  }
648  }
649 
650  // mostly for very forward triplets.....
651  template <typename TrackerTraits>
652  __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
653  uint16_t nmin,
654  bool dupPassThrough,
655  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
656  // quality to mark rejected
658 
659  auto &hitToTuple = *phitToTuple;
660 
661  int first = blockDim.x * blockIdx.x + threadIdx.x;
662  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
663  if (hitToTuple.size(idx) < 2)
664  continue;
665 
666  auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
667 
668  // full combinatorics
669  for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
670  auto const it = *ip;
671  auto qi = tracks_view[it].quality();
672  if (qi <= reject)
673  continue;
674  auto opi = tracks_view[it].state()(2);
675  auto e2opi = tracks_view[it].covariance()(9);
676  auto cti = tracks_view[it].state()(3);
677  auto e2cti = tracks_view[it].covariance()(12);
678  auto nli = tracks_view[it].nLayers();
679  for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
680  auto const jt = *jp;
681  auto qj = tracks_view[jt].quality();
682  if (qj <= reject)
683  continue;
684  auto opj = tracks_view[jt].state()(2);
685  auto ctj = tracks_view[jt].state()(3);
686  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
687  if ((cti - ctj) * (cti - ctj) > dct)
688  continue;
689  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
690  if ((opi - opj) * (opi - opj) > dop)
691  continue;
692  auto nlj = tracks_view[jt].nLayers();
693  if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
694  tracks_view[jt].quality() = reject;
695  else {
696  tracks_view[it].quality() = reject;
697  break;
698  }
699  }
700  }
701  }
702  }
703 
704  template <typename TrackerTraits>
705  __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
706  TkSoAView<TrackerTraits> tracks_view,
707  int nmin,
708  bool dupPassThrough,
709  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
710  // quality to mark rejected
712  // quality of longest track
714 
715  auto &hitToTuple = *phitToTuple;
716 
717  int l1end = hh.hitsLayerStart()[1];
718 
719  int first = blockDim.x * blockIdx.x + threadIdx.x;
720  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
721  if (hitToTuple.size(idx) < 2)
722  continue;
723 
724  int8_t maxNl = 0;
725 
726  // find maxNl
727  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
728  if (tracks_view[*it].quality() < longTqual)
729  continue;
730  // if (tracks_view[*it].nHits()==3) continue;
731  auto nl = tracks_view[*it].nLayers();
732  maxNl = std::max(nl, maxNl);
733  }
734 
735  if (maxNl < 4)
736  continue;
737 
738  // quad pass through (leave for tests)
739  // maxNl = std::min(4, maxNl);
740 
741  // kill all tracks shorter than maxHl (only triplets???
742  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
743  auto nl = tracks_view[*it].nLayers();
744 
745  //checking if shared hit is on bpix1 and if the tuple is short enough
746  if (idx < l1end and nl > nmin)
747  continue;
748 
749  if (nl < maxNl && tracks_view[*it].quality() > reject)
750  tracks_view[*it].quality() = reject;
751  }
752  }
753  }
754 
755  template <typename TrackerTraits>
756  __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
757  uint16_t nmin,
758  bool dupPassThrough,
759  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
760  // quality to mark rejected
761  auto const reject = pixelTrack::Quality::loose;
764 
765  auto &hitToTuple = *phitToTuple;
766 
767  int first = blockDim.x * blockIdx.x + threadIdx.x;
768  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
769  if (hitToTuple.size(idx) < 2)
770  continue;
771 
772  float mc = maxScore;
773  uint16_t im = tkNotFound;
774  bool onlyTriplets = true;
775 
776  // check if only triplets
777  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
778  if (tracks_view[*it].quality() <= good)
779  continue;
781  if (!onlyTriplets)
782  break;
783  }
784 
785  // only triplets
786  if (!onlyTriplets)
787  continue;
788 
789  // for triplets choose best tip! (should we first find best quality???)
790  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
791  auto const it = *ip;
794  im = it;
795  }
796  }
797 
798  if (tkNotFound == im)
799  continue;
800 
801  // mark worse ambiguities
802  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
803  auto const it = *ip;
804  if (tracks_view[it].quality() > reject && it != im)
805  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
806  }
807 
808  } // loop over hits
809  }
810 
811  template <typename TrackerTraits>
812  __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
813  uint16_t nmin,
814  bool dupPassThrough,
815  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
816  // quality to mark rejected
817  auto const reject = pixelTrack::Quality::loose;
819  auto const good = pixelTrack::Quality::loose;
820 
821  auto &hitToTuple = *phitToTuple;
822 
823  int first = blockDim.x * blockIdx.x + threadIdx.x;
824  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
825  if (hitToTuple.size(idx) < 2)
826  continue;
827 
828  float mc = maxScore;
829  uint16_t im = tkNotFound;
830 
831  // choose best tip! (should we first find best quality???)
832  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
833  auto const it = *ip;
836  im = it;
837  }
838  }
839 
840  if (tkNotFound == im)
841  continue;
842 
843  // mark worse ambiguities
844  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
845  auto const it = *ip;
847  it != im)
848  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
849  }
850 
851  } // loop over hits
852  }
853 
854  template <typename TrackerTraits>
855  __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
857  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
858  int32_t firstPrint,
859  int32_t lastPrint,
860  int iev) {
862 
863  int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
864  for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
865  i += blockDim.x * gridDim.x) {
866  auto nh = tracks_view.hitIndices().size(i);
867  if (nh < 3)
868  continue;
869  if (tracks_view[i].quality() < loose)
870  continue;
871  printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
872  10000 * iev + i,
873  int(tracks_view[i].quality()),
874  nh,
875  tracks_view[i].nLayers(),
877  tracks_view[i].pt(),
878  tracks_view[i].eta(),
882  tracks_view[i].chi2(),
883  hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
884  hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
885  hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
886  nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
887  nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
888  nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
889  nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
890  }
891  }
892 
893  __global__ void kernel_printCounters(Counters const *counters) {
894  auto const &c = *counters;
895  printf(
896  "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
897  "nDupHits | "
898  "nFishCells | "
899  "nKilledCells | "
900  "nUsedCells | nZeroTrackCells ||\n");
901  printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
902  c.nEvents,
903  c.nHits,
904  c.nCells,
905  c.nTuples,
906  c.nFitTracks,
907  c.nLooseTracks,
908  c.nGoodTracks,
909  c.nUsedHits,
910  c.nDupHits,
911  c.nFishCells,
912  c.nKilledCells,
913  c.nEmptyCells,
914  c.nZeroTrackCells);
915  printf(
916  "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
917  c.nEvents,
918  c.nHits / double(c.nEvents),
919  c.nCells / double(c.nEvents),
920  c.nTuples / double(c.nEvents),
921  c.nFitTracks / double(c.nEvents),
922  c.nLooseTracks / double(c.nEvents),
923  c.nGoodTracks / double(c.nEvents),
924  c.nUsedHits / double(c.nEvents),
925  c.nDupHits / double(c.nEvents),
926  c.nFishCells / double(c.nCells),
927  c.nKilledCells / double(c.nCells),
928  c.nEmptyCells / double(c.nCells),
929  c.nZeroTrackCells / double(c.nCells));
930  }
931 
932 } // namespace caHitNtupletGeneratorKernels
const dim3 threadIdx
Definition: cudaCompat.h:29
HitContainer< TrackerTraits > const *__restrict__ tuples
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const isOuterHitOfCell
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ quality
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
const dim3 gridDim
Definition: cudaCompat.h:33
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const * cellNeighbors
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const * cellTracks
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
constexpr bool isNotFinite(T x)
Definition: isFinite.h:9
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
caHitNtupletGenerator::Counters Counters
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ cells
TupleMultiplicity< TrackerTraits > const * tupleMultiplicity
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > bool dupPassThrough
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t maxNumberOfDoublets
TkSoAView< TrackerTraits > GPUCACellT< TrackerTraits > *__restrict__ uint32_t const CellTracksVector< TrackerTraits > cms::cuda::AtomicPairCounter CAParams< TrackerTraits > params
constexpr uint32_t stride
Definition: HelixFit.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t Counters * counters
int np
Definition: AMPTWrapper.h:43
static constexpr __host__ __device__ int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i)
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t nHits
typename TrackSoA< TrackerTraits >::HitContainer HitContainer
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t firstPrint
__device__ __host__ Counters get() const
stack
Definition: svgfig.py:559
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const * hitToTuple
TrackSoAView< TrackerTraits > TkSoAView
uint32_t nh
const dim3 blockIdx
Definition: cudaCompat.h:32
assert(nCells)
cannot be loose
int nt
Definition: AMPTWrapper.h:42
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ nCells
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
T __ldg(T const *x)
Definition: cudaCompat.h:137
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::View TrackSoAView
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t lastPrint
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ HitToTuple< TrackerTraits > const *__restrict__ phitToTuple
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
HitContainer< TrackerTraits > const *__restrict__ ptuples