CMS 3D CMS Logo

CAHitNtupletGeneratorKernelsImpl.h
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 // #define NTUPLE_DEBUG
6 // #define GPU_DEBUG
7 
8 #include <cmath>
9 #include <cstdint>
10 #include <limits>
11 
12 #include <cuda_runtime.h>
13 
17 
20 
21 #include "CAStructures.h"
23 #include "GPUCACell.h"
24 #include "gpuFishbone.h"
25 #include "gpuPixelDoublets.h"
26 
28 
31  constexpr float nSigma2 = 25.f;
32 
33  //all of these below are mostly to avoid brining around the relative namespace
34 
35  template <typename TrackerTraits>
37 
38  template <typename TrackerTraits>
40 
41  template <typename TrackerTraits>
43 
44  template <typename TrackerTraits>
46 
47  template <typename TrackerTraits>
49 
51 
52  template <typename TrackerTraits>
54 
55  template <typename TrackerTraits>
57 
58  template <typename TrackerTraits>
60 
61  template <typename TrackerTraits>
63 
64  template <typename TrackerTraits>
66 
68 
69  template <typename TrackerTraits>
70  __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
74  GPUCACellT<TrackerTraits> const *__restrict__ cells,
75  uint32_t const *__restrict__ nCells,
79  int32_t nHits,
80  uint32_t maxNumberOfDoublets,
82  auto first = threadIdx.x + blockIdx.x * blockDim.x;
83 
84  auto &c = *counters;
85  // counters once per event
86  if (0 == first) {
87  atomicAdd(&c.nEvents, 1);
88  atomicAdd(&c.nHits, nHits);
89  atomicAdd(&c.nCells, *nCells);
90  atomicAdd(&c.nTuples, apc->get().m);
91  atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
92  }
93 
94 #ifdef NTUPLE_DEBUG
95  if (0 == first) {
96  printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
97  *nCells,
98  apc->get().m,
99  apc->get().n,
100  nHits,
101  hitToTuple->totOnes());
102  if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
103  assert(tracks_view.hitIndices().size(apc->get().m) == 0);
104  assert(tracks_view.hitIndices().size() == apc->get().n);
105  }
106  }
107 
108  for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
109  if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit
110  printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
111  assert(tracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
112  for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
113  assert(int(*ih) < nHits);
114  }
115 #endif
116 
117  if (0 == first) {
118  if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
119  printf("Tuples overflow\n");
120  if (*nCells >= maxNumberOfDoublets)
121  printf("Cells overflow\n");
122  if (cellNeighbors && cellNeighbors->full())
123  printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
124  if (cellTracks && cellTracks->full())
125  printf("cellTracks overflow\n");
126  if (int(hitToTuple->nOnes()) < nHits)
127  printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
128 #ifdef GPU_DEBUG
129  printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
130  cellNeighbors->size(),
131  cellTracks->size(),
132  hitToTuple->size());
133 
134 #endif
135  }
136 
137  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
138  auto const &thisCell = cells[idx];
139  if (thisCell.hasFishbone() && !thisCell.isKilled())
140  atomicAdd(&c.nFishCells, 1);
141  if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId];
142  printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
143  if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId];
144  printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
145  if (thisCell.isKilled())
146  atomicAdd(&c.nKilledCells, 1);
147  if (!thisCell.unused())
148  atomicAdd(&c.nEmptyCells, 1);
149  if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
150  atomicAdd(&c.nZeroTrackCells, 1);
151  }
152 
153  for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
154  if (isOuterHitOfCell.container[idx].full()) // ++tooManyOuterHitOfCell;
155  printf("OuterHitOfCell overflow %d\n", idx);
156  }
157  }
158 
159  template <typename TrackerTraits>
160  __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
161  uint32_t const *__restrict__ nCells,
163  constexpr auto reject = pixelTrack::Quality::dup;
164 
165  auto first = threadIdx.x + blockIdx.x * blockDim.x;
166  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
167  auto const &thisCell = cells[idx];
168  if (!thisCell.isKilled())
169  continue;
170 
171  for (auto it : thisCell.tracks())
172  tracks_view[it].quality() = reject;
173  }
174  }
175 
176  // remove shorter tracks if sharing a cell
177  // It does not seem to affect efficiency in any way!
178  template <typename TrackerTraits>
179  __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
180  uint32_t const *__restrict__ nCells,
181  TkSoAView<TrackerTraits> tracks_view,
183  // quality to mark rejected
184  constexpr auto reject = pixelTrack::Quality::edup;
185 
186  assert(nCells);
187  auto first = threadIdx.x + blockIdx.x * blockDim.x;
188  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
189  auto const &thisCell = cells[idx];
190 
191  if (thisCell.tracks().size() < 2)
192  continue;
193 
194  int8_t maxNl = 0;
195 
196  // find maxNl
197  for (auto it : thisCell.tracks()) {
198  auto nl = tracks_view[it].nLayers();
199  maxNl = std::max(nl, maxNl);
200  }
201 
202  // if (maxNl<4) continue;
203  // quad pass through (leave it her for tests)
204  // maxNl = std::min(4, maxNl);
205 
206  for (auto it : thisCell.tracks()) {
207  if (tracks_view[it].nLayers() < maxNl)
208  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
209  }
210  }
211  }
212 
213  // assume the above (so, short tracks already removed)
214  template <typename TrackerTraits>
215  __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
216  uint32_t const *__restrict__ nCells,
217  TkSoAView<TrackerTraits> tracks_view,
218  bool dupPassThrough) {
219  // quality to mark rejected
222 
223  assert(nCells);
224 
225  auto first = threadIdx.x + blockIdx.x * blockDim.x;
226  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
227  auto const &thisCell = cells[idx];
228  if (thisCell.tracks().size() < 2)
229  continue;
230 
231  float mc = maxScore;
232  uint16_t im = tkNotFound;
233 
234  auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
235 
236  // full crazy combinatorics
237  // full crazy combinatorics
238  int ntr = thisCell.tracks().size();
239  for (int i = 0; i < ntr - 1; ++i) {
240  auto it = thisCell.tracks()[i];
241  auto qi = tracks_view[it].quality();
242  if (qi <= reject)
243  continue;
244  auto opi = tracks_view[it].state()(2);
245  auto e2opi = tracks_view[it].covariance()(9);
246  auto cti = tracks_view[it].state()(3);
247  auto e2cti = tracks_view[it].covariance()(12);
248  for (auto j = i + 1; j < ntr; ++j) {
249  auto jt = thisCell.tracks()[j];
250  auto qj = tracks_view[jt].quality();
251  if (qj <= reject)
252  continue;
253  auto opj = tracks_view[jt].state()(2);
254  auto ctj = tracks_view[jt].state()(3);
255  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
256  if ((cti - ctj) * (cti - ctj) > dct)
257  continue;
258  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
259  if ((opi - opj) * (opi - opj) > dop)
260  continue;
261  if ((qj < qi) || (qj == qi && score(it) < score(jt)))
262  tracks_view[jt].quality() = reject;
263  else {
264  tracks_view[it].quality() = reject;
265  break;
266  }
267  }
268  }
269 
270  // find maxQual
271  auto maxQual = reject; // no duplicate!
272  for (auto it : thisCell.tracks()) {
273  if (tracks_view[it].quality() > maxQual)
274  maxQual = tracks_view[it].quality();
275  }
276 
277  if (maxQual <= loose)
278  continue;
279 
280  // find min score
281  for (auto it : thisCell.tracks()) {
282  if (tracks_view[it].quality() == maxQual && score(it) < mc) {
283  mc = score(it);
284  im = it;
285  }
286  }
287 
288  if (tkNotFound == im)
289  continue;
290 
291  // mark all other duplicates (not yet, keep it loose)
292  for (auto it : thisCell.tracks()) {
293  if (tracks_view[it].quality() > loose && it != im)
294  tracks_view[it].quality() = loose; //no race: simple assignment of the same constant
295  }
296  }
297  }
298 
299  template <typename TrackerTraits>
300  __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
301  cms::cuda::AtomicPairCounter *apc2, // just to zero them,
304  uint32_t const *__restrict__ nCells,
309 
310  auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
311  auto first = threadIdx.x;
312  auto stride = blockDim.x;
313 
314  if (0 == (firstCellIndex + first)) {
315  (*apc1) = 0;
316  (*apc2) = 0;
317  } // ready for next kernel
318 
319  constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
320  constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
321  for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
322  auto cellIndex = idx;
323  auto &thisCell = cells[idx];
324  auto innerHitId = thisCell.inner_hit_id();
325  if (int(innerHitId) < isOuterHitOfCell.offset)
326  continue;
327  int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
328  auto vi = isOuterHitOfCell[innerHitId].data();
329 
330  auto ri = thisCell.inner_r(hh);
331  auto zi = thisCell.inner_z(hh);
332 
333  auto ro = thisCell.outer_r(hh);
334  auto zo = thisCell.outer_z(hh);
335  auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
336 
337  for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
338  auto otherCell = __ldg(vi + j);
339  auto &oc = cells[otherCell];
340  auto r1 = oc.inner_r(hh);
341  auto z1 = oc.inner_z(hh);
342  bool aligned = Cell::areAlignedRZ(
343  r1,
344  z1,
345  ri,
346  zi,
347  ro,
348  zo,
349  params.ptmin_,
350  isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts
351  if (aligned && thisCell.dcaCut(hh,
352  oc,
353  oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
354  : params.dcaCutOuterTriplet_,
355  params.hardCurvCut_)) { // FIXME tune cuts
356  oc.addOuterNeighbor(cellIndex, *cellNeighbors);
357  thisCell.setStatusBits(Cell::StatusBit::kUsed);
358  oc.setStatusBits(Cell::StatusBit::kUsed);
359  }
360  } // loop on inner cells
361  } // loop on outer cells
362  }
363 
364  template <typename TrackerTraits>
365  __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
366  TkSoAView<TrackerTraits> tracks_view,
367  GPUCACellT<TrackerTraits> *__restrict__ cells,
368  uint32_t const *nCells,
369  CellTracksVector<TrackerTraits> *cellTracks,
372  // recursive: not obvious to widen
373 
375 
376  auto first = threadIdx.x + blockIdx.x * blockDim.x;
377 
378 #ifdef GPU_DEBUG
379  if (first == 0)
380  printf("starting producing ntuplets from %d cells \n", *nCells);
381 #endif
382  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
383  auto const &thisCell = cells[idx];
384 
385  if (thisCell.isKilled())
386  continue; // cut by earlyFishbone
387 
388  // we require at least three hits...
389  if (thisCell.outerNeighbors().empty())
390  continue;
391 
392  auto pid = thisCell.layerPairId();
393  bool doit = params.startingLayerPair(pid);
394 
395  constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
396  if (doit) {
397  typename Cell::TmpTuple stack;
398  stack.reset();
399 
400  bool bpix1Start = params.startAt0(pid);
401 
402  thisCell.template find_ntuplets<maxDepth>(hh,
403  cells,
404  *cellTracks,
405  tracks_view.hitIndices(),
406  *apc,
407  tracks_view.quality(),
408  stack,
409  params.minHitsPerNtuplet_,
410  bpix1Start);
411 
412  assert(stack.empty());
413  }
414  }
415  }
416  template <typename TrackerTraits>
417  __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
418  auto first = threadIdx.x + blockIdx.x * blockDim.x;
420  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
421  auto &thisCell = cells[idx];
422  if (!thisCell.tracks().empty())
423  thisCell.setStatusBits(Cell::StatusBit::kInTrack);
424  }
425  }
426 
427  template <typename TrackerTraits>
428  __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
430  auto first = blockIdx.x * blockDim.x + threadIdx.x;
431  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
432  auto nhits = tracks_view.hitIndices().size(it);
433  if (nhits < 3)
434  continue;
436  continue;
438  if (nhits > TrackerTraits::maxHitsOnTrack) // current limit
439  printf("wrong mult %d %d\n", it, nhits);
440  assert(nhits <= TrackerTraits::maxHitsOnTrack);
441  tupleMultiplicity->count(nhits);
442  }
443  }
444 
445  template <typename TrackerTraits>
446  __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
448  auto first = blockIdx.x * blockDim.x + threadIdx.x;
449  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
450  auto nhits = tracks_view.hitIndices().size(it);
451  if (nhits < 3)
452  continue;
454  continue;
456  if (nhits > TrackerTraits::maxHitsOnTrack)
457  printf("wrong mult %d %d\n", it, nhits);
458  assert(nhits <= TrackerTraits::maxHitsOnTrack);
459  tupleMultiplicity->fill(nhits, it);
460  }
461  }
462 
464  template <typename TrackerTraits>
466  // Quality *__restrict__ quality) {
467  int first = blockDim.x * blockIdx.x + threadIdx.x;
468  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
469  auto nhits = tracks_view.hitIndices().size(it);
470  if (nhits == 0)
471  break; // guard
472 
473  // if duplicate: not even fit
475  continue;
476 
478 
479  // mark doublets as bad
480  if (nhits < 3)
481  continue;
482 
483  // if the fit has any invalid parameters, mark it as bad
484  bool isNaN = false;
485  for (int i = 0; i < 5; ++i) {
486  isNaN |= std::isnan(tracks_view[it].state()(i));
487  }
488  if (isNaN) {
489 #ifdef NTUPLE_DEBUG
490  printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
491 #endif
492  continue;
493  }
494 
496 
497  if (cuts.strictCut(tracks_view, it))
498  continue;
499 
500  tracks_view[it].quality() = pixelTrack::Quality::tight;
501 
502  if (cuts.isHP(tracks_view, nhits, it))
504  }
505  }
506 
507  template <typename TrackerTraits>
508  __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
509  int first = blockDim.x * blockIdx.x + threadIdx.x;
510  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
511  if (tracks_view.hitIndices().size(idx) == 0)
512  break; //guard
514  continue;
515  atomicAdd(&(counters->nLooseTracks), 1);
517  continue;
518  atomicAdd(&(counters->nGoodTracks), 1);
519  }
520  }
521 
522  template <typename TrackerTraits>
523  __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
524  int first = blockDim.x * blockIdx.x + threadIdx.x;
525  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
526  if (tracks_view.hitIndices().size(idx) == 0)
527  break; // guard
528  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
529  hitToTuple->count(*h);
530  }
531  }
532 
533  template <typename TrackerTraits>
534  __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
535  int first = blockDim.x * blockIdx.x + threadIdx.x;
536  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
537  if (tracks_view.hitIndices().size(idx) == 0)
538  break; // guard
539  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
540  hitToTuple->fill(*h, idx);
541  }
542  }
543 
544  template <typename TrackerTraits>
546  int first = blockDim.x * blockIdx.x + threadIdx.x;
547  // copy offsets
548  for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
549  tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
550  }
551  // fill hit indices
552  auto nhits = hh.nHits();
553 
554  for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
555  assert(tracks_view.hitIndices().content[idx] < nhits);
556  tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
557  }
558  }
559 
560  template <typename TrackerTraits>
561  __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
562  auto first = blockIdx.x * blockDim.x + threadIdx.x;
563  // clamp the number of tracks to the capacity of the SoA
564  auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
565  if (0 == first)
566  tracks_view.nTracks() = ntracks;
567  for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
569  assert(nHits >= 3);
571  }
572  }
573 
574  template <typename TrackerTraits>
575  __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
576  Counters *counters) {
577  auto &c = *counters;
578  int first = blockDim.x * blockIdx.x + threadIdx.x;
579  for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
580  if (hitToTuple->size(idx) == 0)
581  continue; // SHALL NOT BE break
582  atomicAdd(&c.nUsedHits, 1);
583  if (hitToTuple->size(idx) > 1)
584  atomicAdd(&c.nDupHits, 1);
585  }
586  }
587 
588  template <typename TrackerTraits>
589  __global__ void kernel_countSharedHit(int *__restrict__ nshared,
591  Quality const *__restrict__ quality,
592  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
593  constexpr auto loose = pixelTrack::Quality::loose;
594 
595  auto &hitToTuple = *phitToTuple;
596  auto const &foundNtuplets = *ptuples;
597 
598  int first = blockDim.x * blockIdx.x + threadIdx.x;
599  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
600  if (hitToTuple.size(idx) < 2)
601  continue;
602 
603  int nt = 0;
604 
605  // count "good" tracks
606  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
607  if (quality[*it] < loose)
608  continue;
609  ++nt;
610  }
611 
612  if (nt < 2)
613  continue;
614 
615  // now mark each track triplet as sharing a hit
616  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
617  if (foundNtuplets.size(*it) > 3)
618  continue;
619  atomicAdd(&nshared[*it], 1);
620  }
621 
622  } // hit loop
623  }
624 
625  template <typename TrackerTraits>
626  __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
627  HitContainer<TrackerTraits> const *__restrict__ tuples,
628  Quality *__restrict__ quality,
629  bool dupPassThrough) {
630  // constexpr auto bad = pixelTrack::Quality::bad;
631  constexpr auto dup = pixelTrack::Quality::dup;
632  constexpr auto loose = pixelTrack::Quality::loose;
633  // constexpr auto strict = pixelTrack::Quality::strict;
634 
635  // quality to mark rejected
636  auto const reject = dupPassThrough ? loose : dup;
637 
638  int first = blockDim.x * blockIdx.x + threadIdx.x;
639  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
640  if (tuples->size(idx) == 0)
641  break; //guard
642  if (quality[idx] <= reject)
643  continue;
644  if (nshared[idx] > 2)
645  quality[idx] = reject;
646  }
647  }
648 
649  // mostly for very forward triplets.....
650  template <typename TrackerTraits>
651  __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
652  uint16_t nmin,
653  bool dupPassThrough,
654  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
655  // quality to mark rejected
657 
658  auto &hitToTuple = *phitToTuple;
659 
660  int first = blockDim.x * blockIdx.x + threadIdx.x;
661  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
662  if (hitToTuple.size(idx) < 2)
663  continue;
664 
665  auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
666 
667  // full combinatorics
668  for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
669  auto const it = *ip;
670  auto qi = tracks_view[it].quality();
671  if (qi <= reject)
672  continue;
673  auto opi = tracks_view[it].state()(2);
674  auto e2opi = tracks_view[it].covariance()(9);
675  auto cti = tracks_view[it].state()(3);
676  auto e2cti = tracks_view[it].covariance()(12);
677  auto nli = tracks_view[it].nLayers();
678  for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
679  auto const jt = *jp;
680  auto qj = tracks_view[jt].quality();
681  if (qj <= reject)
682  continue;
683  auto opj = tracks_view[jt].state()(2);
684  auto ctj = tracks_view[jt].state()(3);
685  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
686  if ((cti - ctj) * (cti - ctj) > dct)
687  continue;
688  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
689  if ((opi - opj) * (opi - opj) > dop)
690  continue;
691  auto nlj = tracks_view[jt].nLayers();
692  if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
693  tracks_view[jt].quality() = reject;
694  else {
695  tracks_view[it].quality() = reject;
696  break;
697  }
698  }
699  }
700  }
701  }
702 
703  template <typename TrackerTraits>
704  __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
705  TkSoAView<TrackerTraits> tracks_view,
706  int nmin,
707  bool dupPassThrough,
708  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
709  // quality to mark rejected
711  // quality of longest track
713 
714  auto &hitToTuple = *phitToTuple;
715 
716  int l1end = hh.hitsLayerStart()[1];
717 
718  int first = blockDim.x * blockIdx.x + threadIdx.x;
719  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
720  if (hitToTuple.size(idx) < 2)
721  continue;
722 
723  int8_t maxNl = 0;
724 
725  // find maxNl
726  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
727  if (tracks_view[*it].quality() < longTqual)
728  continue;
729  // if (tracks_view[*it].nHits()==3) continue;
730  auto nl = tracks_view[*it].nLayers();
731  maxNl = std::max(nl, maxNl);
732  }
733 
734  if (maxNl < 4)
735  continue;
736 
737  // quad pass through (leave for tests)
738  // maxNl = std::min(4, maxNl);
739 
740  // kill all tracks shorter than maxHl (only triplets???
741  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
742  auto nl = tracks_view[*it].nLayers();
743 
744  //checking if shared hit is on bpix1 and if the tuple is short enough
745  if (idx < l1end and nl > nmin)
746  continue;
747 
748  if (nl < maxNl && tracks_view[*it].quality() > reject)
749  tracks_view[*it].quality() = reject;
750  }
751  }
752  }
753 
754  template <typename TrackerTraits>
755  __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
756  uint16_t nmin,
757  bool dupPassThrough,
758  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
759  // quality to mark rejected
760  auto const reject = pixelTrack::Quality::loose;
763 
764  auto &hitToTuple = *phitToTuple;
765 
766  int first = blockDim.x * blockIdx.x + threadIdx.x;
767  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
768  if (hitToTuple.size(idx) < 2)
769  continue;
770 
771  float mc = maxScore;
772  uint16_t im = tkNotFound;
773  bool onlyTriplets = true;
774 
775  // check if only triplets
776  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
777  if (tracks_view[*it].quality() <= good)
778  continue;
780  if (!onlyTriplets)
781  break;
782  }
783 
784  // only triplets
785  if (!onlyTriplets)
786  continue;
787 
788  // for triplets choose best tip! (should we first find best quality???)
789  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
790  auto const it = *ip;
793  im = it;
794  }
795  }
796 
797  if (tkNotFound == im)
798  continue;
799 
800  // mark worse ambiguities
801  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
802  auto const it = *ip;
803  if (tracks_view[it].quality() > reject && it != im)
804  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
805  }
806 
807  } // loop over hits
808  }
809 
810  template <typename TrackerTraits>
811  __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
812  uint16_t nmin,
813  bool dupPassThrough,
814  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
815  // quality to mark rejected
816  auto const reject = pixelTrack::Quality::loose;
818  auto const good = pixelTrack::Quality::loose;
819 
820  auto &hitToTuple = *phitToTuple;
821 
822  int first = blockDim.x * blockIdx.x + threadIdx.x;
823  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
824  if (hitToTuple.size(idx) < 2)
825  continue;
826 
827  float mc = maxScore;
828  uint16_t im = tkNotFound;
829 
830  // choose best tip! (should we first find best quality???)
831  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
832  auto const it = *ip;
835  im = it;
836  }
837  }
838 
839  if (tkNotFound == im)
840  continue;
841 
842  // mark worse ambiguities
843  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
844  auto const it = *ip;
846  it != im)
847  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
848  }
849 
850  } // loop over hits
851  }
852 
853  template <typename TrackerTraits>
854  __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
856  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
857  int32_t firstPrint,
858  int32_t lastPrint,
859  int iev) {
860  constexpr auto loose = pixelTrack::Quality::loose;
861 
862  int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
863  for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
864  i += blockDim.x * gridDim.x) {
865  auto nh = tracks_view.hitIndices().size(i);
866  if (nh < 3)
867  continue;
868  if (tracks_view[i].quality() < loose)
869  continue;
870  printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
871  10000 * iev + i,
872  int(tracks_view[i].quality()),
873  nh,
874  tracks_view[i].nLayers(),
876  tracks_view[i].pt(),
877  tracks_view[i].eta(),
881  tracks_view[i].chi2(),
882  hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
883  hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
884  hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
885  nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
886  nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
887  nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
888  nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
889  }
890  }
891 
892  __global__ void kernel_printCounters(Counters const *counters) {
893  auto const &c = *counters;
894  printf(
895  "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
896  "nDupHits | "
897  "nFishCells | "
898  "nKilledCells | "
899  "nUsedCells | nZeroTrackCells ||\n");
900  printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
901  c.nEvents,
902  c.nHits,
903  c.nCells,
904  c.nTuples,
905  c.nFitTracks,
906  c.nLooseTracks,
907  c.nGoodTracks,
908  c.nUsedHits,
909  c.nDupHits,
910  c.nFishCells,
911  c.nKilledCells,
912  c.nEmptyCells,
913  c.nZeroTrackCells);
914  printf(
915  "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
916  c.nEvents,
917  c.nHits / double(c.nEvents),
918  c.nCells / double(c.nEvents),
919  c.nTuples / double(c.nEvents),
920  c.nFitTracks / double(c.nEvents),
921  c.nLooseTracks / double(c.nEvents),
922  c.nGoodTracks / double(c.nEvents),
923  c.nUsedHits / double(c.nEvents),
924  c.nDupHits / double(c.nEvents),
925  c.nFishCells / double(c.nCells),
926  c.nKilledCells / double(c.nCells),
927  c.nEmptyCells / double(c.nCells),
928  c.nZeroTrackCells / double(c.nCells));
929  }
930 
931 } // namespace caHitNtupletGeneratorKernels
const dim3 threadIdx
Definition: cudaCompat.h:29
HitContainer< TrackerTraits > const *__restrict__ tuples
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const isOuterHitOfCell
def isnan(num)
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ quality
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
const dim3 gridDim
Definition: cudaCompat.h:33
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const * cellNeighbors
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const * cellTracks
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
caHitNtupletGenerator::Counters Counters
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ cells
TupleMultiplicity< TrackerTraits > const * tupleMultiplicity
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > bool dupPassThrough
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t maxNumberOfDoublets
TkSoAView< TrackerTraits > GPUCACellT< TrackerTraits > *__restrict__ uint32_t const CellTracksVector< TrackerTraits > cms::cuda::AtomicPairCounter CAParams< TrackerTraits > params
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t Counters * counters
int np
Definition: AMPTWrapper.h:43
static constexpr __host__ __device__ int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i)
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t nHits
typename TrackSoA< TrackerTraits >::HitContainer HitContainer
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t firstPrint
__device__ __host__ Counters get() const
stack
Definition: svgfig.py:559
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const * hitToTuple
TrackSoAView< TrackerTraits > TkSoAView
uint32_t nh
const dim3 blockIdx
Definition: cudaCompat.h:32
assert(nCells)
cannot be loose
int nt
Definition: AMPTWrapper.h:42
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ nCells
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
T __ldg(T const *x)
Definition: cudaCompat.h:137
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::View TrackSoAView
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t lastPrint
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
__shared__ uint32_t ntot
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ HitToTuple< TrackerTraits > const *__restrict__ phitToTuple
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
HitContainer< TrackerTraits > const *__restrict__ ptuples