CMS 3D CMS Logo

CAHitNtupletGeneratorKernelsImpl.h
Go to the documentation of this file.
1 //
2 // Original Author: Felice Pantaleo, CERN
3 //
4 
5 // #define NTUPLE_DEBUG
6 // #define GPU_DEBUG
7 
8 #include <cmath>
9 #include <cstdint>
10 #include <limits>
11 
12 #include <cuda_runtime.h>
13 
17 
20 
21 #include "CAStructures.h"
23 #include "GPUCACell.h"
24 #include "gpuFishbone.h"
25 #include "gpuPixelDoublets.h"
26 
28 
31  constexpr float nSigma2 = 25.f;
32 
33  //all of these below are mostly to avoid brining around the relative namespace
34 
35  template <typename TrackerTraits>
37 
38  template <typename TrackerTraits>
40 
41  template <typename TrackerTraits>
43 
44  template <typename TrackerTraits>
46 
47  template <typename TrackerTraits>
49 
51 
52  template <typename TrackerTraits>
54 
55  template <typename TrackerTraits>
57 
58  template <typename TrackerTraits>
60 
61  template <typename TrackerTraits>
63 
64  template <typename TrackerTraits>
66 
68 
69  template <typename TrackerTraits>
70  __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
74  GPUCACellT<TrackerTraits> const *__restrict__ cells,
75  uint32_t const *__restrict__ nCells,
79  int32_t nHits,
80  uint32_t maxNumberOfDoublets,
82  auto first = threadIdx.x + blockIdx.x * blockDim.x;
83 
84  auto &c = *counters;
85  // counters once per event
86  if (0 == first) {
87  atomicAdd(&c.nEvents, 1);
88  atomicAdd(&c.nHits, nHits);
89  atomicAdd(&c.nCells, *nCells);
90  atomicAdd(&c.nTuples, apc->get().m);
91  atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
92  }
93 
94 #ifdef NTUPLE_DEBUG
95  if (0 == first) {
96  printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
97  *nCells,
98  apc->get().m,
99  apc->get().n,
100  nHits,
101  hitToTuple->totOnes());
102  if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
103  assert(tracks_view.hitIndices().size(apc->get().m) == 0);
104  assert(tracks_view.hitIndices().size() == apc->get().n);
105  }
106  }
107 
108  for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
109  if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit
110  printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
111  assert(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
112  for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
113  assert(int(*ih) < nHits);
114  }
115 #endif
116 
117  if (0 == first) {
118  if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
119  printf("Tuples overflow\n");
120  if (*nCells >= maxNumberOfDoublets)
121  printf("Cells overflow\n");
122  if (cellNeighbors && cellNeighbors->full())
123  printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
124  if (cellTracks && cellTracks->full())
125  printf("cellTracks overflow\n");
126  if (int(hitToTuple->nOnes()) < nHits)
127  printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
128 #ifdef GPU_DEBUG
129  printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
130  cellNeighbors->size(),
131  cellTracks->size(),
132  hitToTuple->size());
133 // printf("cellTracksSizes;");
134 // for (int i = 0; i < cellTracks->size(); i++) {
135 // printf("%d;",cellTracks[i].size());
136 // }
137 //
138 // printf("\n");
139 // printf("cellNeighborsSizes;");
140 // for (int i = 0; i < cellNeighbors->size(); i++) {
141 // printf("%d;",cellNeighbors[i].size());
142 // }
143 // printf("\n");
144 #endif
145  }
146 
147  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
148  auto const &thisCell = cells[idx];
149  if (thisCell.hasFishbone() && !thisCell.isKilled())
150  atomicAdd(&c.nFishCells, 1);
151  if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId];
152  printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
153  if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId];
154  printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
155  if (thisCell.isKilled())
156  atomicAdd(&c.nKilledCells, 1);
157  if (!thisCell.unused())
158  atomicAdd(&c.nEmptyCells, 1);
159  if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
160  atomicAdd(&c.nZeroTrackCells, 1);
161  }
162 
163  for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
164  if (isOuterHitOfCell.container[idx].full()) // ++tooManyOuterHitOfCell;
165  printf("OuterHitOfCell overflow %d\n", idx);
166  }
167  }
168 
169  template <typename TrackerTraits>
170  __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
171  uint32_t const *__restrict__ nCells,
173  constexpr auto reject = pixelTrack::Quality::dup;
174 
175  auto first = threadIdx.x + blockIdx.x * blockDim.x;
176  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
177  auto const &thisCell = cells[idx];
178  if (!thisCell.isKilled())
179  continue;
180 
181  for (auto it : thisCell.tracks())
182  tracks_view[it].quality() = reject;
183  }
184  }
185 
186  // remove shorter tracks if sharing a cell
187  // It does not seem to affect efficiency in any way!
188  template <typename TrackerTraits>
189  __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
190  uint32_t const *__restrict__ nCells,
191  TkSoAView<TrackerTraits> tracks_view,
193  // quality to mark rejected
194  constexpr auto reject = pixelTrack::Quality::edup;
195 
196  assert(nCells);
197  auto first = threadIdx.x + blockIdx.x * blockDim.x;
198  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
199  auto const &thisCell = cells[idx];
200 
201  if (thisCell.tracks().size() < 2)
202  continue;
203 
204  int8_t maxNl = 0;
205 
206  // find maxNl
207  for (auto it : thisCell.tracks()) {
208  auto nl = tracks_view[it].nLayers();
209  maxNl = std::max(nl, maxNl);
210  }
211 
212  // if (maxNl<4) continue;
213  // quad pass through (leave it her for tests)
214  // maxNl = std::min(4, maxNl);
215 
216  for (auto it : thisCell.tracks()) {
217  if (tracks_view[it].nLayers() < maxNl)
218  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
219  }
220  }
221  }
222 
223  // assume the above (so, short tracks already removed)
224  template <typename TrackerTraits>
225  __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
226  uint32_t const *__restrict__ nCells,
227  TkSoAView<TrackerTraits> tracks_view,
228  bool dupPassThrough) {
229  // quality to mark rejected
232 
233  assert(nCells);
234 
235  auto first = threadIdx.x + blockIdx.x * blockDim.x;
236  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
237  auto const &thisCell = cells[idx];
238  if (thisCell.tracks().size() < 2)
239  continue;
240 
241  float mc = maxScore;
242  uint16_t im = tkNotFound;
243 
244  auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
245 
246  // full crazy combinatorics
247  // full crazy combinatorics
248  int ntr = thisCell.tracks().size();
249  for (int i = 0; i < ntr - 1; ++i) {
250  auto it = thisCell.tracks()[i];
251  auto qi = tracks_view[it].quality();
252  if (qi <= reject)
253  continue;
254  auto opi = tracks_view[it].state()(2);
255  auto e2opi = tracks_view[it].covariance()(9);
256  auto cti = tracks_view[it].state()(3);
257  auto e2cti = tracks_view[it].covariance()(12);
258  for (auto j = i + 1; j < ntr; ++j) {
259  auto jt = thisCell.tracks()[j];
260  auto qj = tracks_view[jt].quality();
261  if (qj <= reject)
262  continue;
263  auto opj = tracks_view[jt].state()(2);
264  auto ctj = tracks_view[jt].state()(3);
265  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
266  if ((cti - ctj) * (cti - ctj) > dct)
267  continue;
268  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
269  if ((opi - opj) * (opi - opj) > dop)
270  continue;
271  if ((qj < qi) || (qj == qi && score(it) < score(jt)))
272  tracks_view[jt].quality() = reject;
273  else {
274  tracks_view[it].quality() = reject;
275  break;
276  }
277  }
278  }
279 
280  // find maxQual
281  auto maxQual = reject; // no duplicate!
282  for (auto it : thisCell.tracks()) {
283  if (tracks_view[it].quality() > maxQual)
284  maxQual = tracks_view[it].quality();
285  }
286 
287  if (maxQual <= loose)
288  continue;
289 
290  // find min score
291  for (auto it : thisCell.tracks()) {
292  if (tracks_view[it].quality() == maxQual && score(it) < mc) {
293  mc = score(it);
294  im = it;
295  }
296  }
297 
298  if (tkNotFound == im)
299  continue;
300 
301  // mark all other duplicates (not yet, keep it loose)
302  for (auto it : thisCell.tracks()) {
303  if (tracks_view[it].quality() > loose && it != im)
304  tracks_view[it].quality() = loose; //no race: simple assignment of the same constant
305  }
306  }
307  }
308 
309  template <typename TrackerTraits>
310  __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
311  cms::cuda::AtomicPairCounter *apc2, // just to zero them,
314  uint32_t const *__restrict__ nCells,
319 
320  auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
321  auto first = threadIdx.x;
322  auto stride = blockDim.x;
323 
324  if (0 == (firstCellIndex + first)) {
325  (*apc1) = 0;
326  (*apc2) = 0;
327  } // ready for next kernel
328 
329  constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
330  constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
331  for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
332  auto cellIndex = idx;
333  auto &thisCell = cells[idx];
334  auto innerHitId = thisCell.inner_hit_id();
335  if (int(innerHitId) < isOuterHitOfCell.offset)
336  continue;
337  int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
338  auto vi = isOuterHitOfCell[innerHitId].data();
339 
340  auto ri = thisCell.inner_r(hh);
341  auto zi = thisCell.inner_z(hh);
342 
343  auto ro = thisCell.outer_r(hh);
344  auto zo = thisCell.outer_z(hh);
345  auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
346 
347  for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
348  auto otherCell = __ldg(vi + j);
349  auto &oc = cells[otherCell];
350  auto r1 = oc.inner_r(hh);
351  auto z1 = oc.inner_z(hh);
352  bool aligned = Cell::areAlignedRZ(
353  r1,
354  z1,
355  ri,
356  zi,
357  ro,
358  zo,
359  params.ptmin_,
360  isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts
361  if (aligned && thisCell.dcaCut(hh,
362  oc,
363  oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
364  : params.dcaCutOuterTriplet_,
365  params.hardCurvCut_)) { // FIXME tune cuts
366  oc.addOuterNeighbor(cellIndex, *cellNeighbors);
367  thisCell.setStatusBits(Cell::StatusBit::kUsed);
368  oc.setStatusBits(Cell::StatusBit::kUsed);
369  }
370  } // loop on inner cells
371  } // loop on outer cells
372  }
373 
374  template <typename TrackerTraits>
375  __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
376  TkSoAView<TrackerTraits> tracks_view,
377  GPUCACellT<TrackerTraits> *__restrict__ cells,
378  uint32_t const *nCells,
379  CellTracksVector<TrackerTraits> *cellTracks,
382  // recursive: not obvious to widen
383 
385 
386  auto first = threadIdx.x + blockIdx.x * blockDim.x;
387 
388 #ifdef GPU_DEBUG
389  if (first == 0)
390  printf("starting producing ntuplets from %d cells \n", *nCells);
391 #endif
392  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
393  auto const &thisCell = cells[idx];
394 
395  if (thisCell.isKilled())
396  continue; // cut by earlyFishbone
397 
398  // we require at least three hits...
399  if (thisCell.outerNeighbors().empty())
400  continue;
401 
402  auto pid = thisCell.layerPairId();
403  bool doit = params.startingLayerPair(pid);
404 
405  constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
406  if (doit) {
407  typename Cell::TmpTuple stack;
408  stack.reset();
409 
410  bool bpix1Start = params.startAt0(pid);
411 
412  thisCell.template find_ntuplets<maxDepth>(hh,
413  cells,
414  *cellTracks,
415  tracks_view.hitIndices(),
416  *apc,
417  tracks_view.quality(),
418  stack,
419  params.minHitsPerNtuplet_,
420  bpix1Start);
421 
422  assert(stack.empty());
423  }
424  }
425  }
426  template <typename TrackerTraits>
427  __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
428  auto first = threadIdx.x + blockIdx.x * blockDim.x;
430  for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
431  auto &thisCell = cells[idx];
432  if (!thisCell.tracks().empty())
433  thisCell.setStatusBits(Cell::StatusBit::kInTrack);
434  }
435  }
436 
437  template <typename TrackerTraits>
438  __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
440  auto first = blockIdx.x * blockDim.x + threadIdx.x;
441  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
442  auto nhits = tracks_view.hitIndices().size(it);
443  if (nhits < 3)
444  continue;
446  continue;
448  if (nhits > TrackerTraits::maxHitsOnTrack) // current limit
449  printf("wrong mult %d %d\n", it, nhits);
450  assert(nhits <= TrackerTraits::maxHitsOnTrack);
451  tupleMultiplicity->count(nhits);
452  }
453  }
454 
455  template <typename TrackerTraits>
456  __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
458  auto first = blockIdx.x * blockDim.x + threadIdx.x;
459  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
460  auto nhits = tracks_view.hitIndices().size(it);
461  if (nhits < 3)
462  continue;
464  continue;
466  if (nhits > TrackerTraits::maxHitsOnTrack)
467  printf("wrong mult %d %d\n", it, nhits);
468  assert(nhits <= TrackerTraits::maxHitsOnTrack);
469  tupleMultiplicity->fill(nhits, it);
470  }
471  }
472 
474  template <typename TrackerTraits>
476  // Quality *__restrict__ quality) {
477  int first = blockDim.x * blockIdx.x + threadIdx.x;
478  for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
479  auto nhits = tracks_view.hitIndices().size(it);
480  if (nhits == 0)
481  break; // guard
482 
483  // if duplicate: not even fit
485  continue;
486 
488 
489  // mark doublets as bad
490  if (nhits < 3)
491  continue;
492 
493  // if the fit has any invalid parameters, mark it as bad
494  bool isNaN = false;
495  for (int i = 0; i < 5; ++i) {
496  isNaN |= std::isnan(tracks_view[it].state()(i));
497  }
498  if (isNaN) {
499 #ifdef NTUPLE_DEBUG
500  printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
501 #endif
502  continue;
503  }
504 
506 
507  if (cuts.strictCut(tracks_view, it))
508  continue;
509 
510  tracks_view[it].quality() = pixelTrack::Quality::tight;
511 
512  if (cuts.isHP(tracks_view, nhits, it))
514  }
515  }
516 
517  template <typename TrackerTraits>
518  __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
519  int first = blockDim.x * blockIdx.x + threadIdx.x;
520  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
521  if (tracks_view.hitIndices().size(idx) == 0)
522  break; //guard
524  continue;
525  atomicAdd(&(counters->nLooseTracks), 1);
527  continue;
528  atomicAdd(&(counters->nGoodTracks), 1);
529  }
530  }
531 
532  template <typename TrackerTraits>
533  __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
534  int first = blockDim.x * blockIdx.x + threadIdx.x;
535  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
536  if (tracks_view.hitIndices().size(idx) == 0)
537  break; // guard
538  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
539  hitToTuple->count(*h);
540  }
541  }
542 
543  template <typename TrackerTraits>
544  __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
545  int first = blockDim.x * blockIdx.x + threadIdx.x;
546  for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
547  if (tracks_view.hitIndices().size(idx) == 0)
548  break; // guard
549  for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
550  hitToTuple->fill(*h, idx);
551  }
552  }
553 
554  template <typename TrackerTraits>
556  int first = blockDim.x * blockIdx.x + threadIdx.x;
557  // copy offsets
558  for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
559  tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
560  }
561  // fill hit indices
562  auto nhits = hh.nHits();
563 
564  for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
565  assert(tracks_view.hitIndices().content[idx] < nhits);
566  tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
567  }
568  }
569 
570  template <typename TrackerTraits>
571  __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
572  auto first = blockIdx.x * blockDim.x + threadIdx.x;
573  // clamp the number of tracks to the capacity of the SoA
574  auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
575  if (0 == first)
576  tracks_view.nTracks() = ntracks;
577  for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
579  assert(nHits >= 3);
581  }
582  }
583 
584  template <typename TrackerTraits>
585  __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
586  Counters *counters) {
587  auto &c = *counters;
588  int first = blockDim.x * blockIdx.x + threadIdx.x;
589  for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
590  if (hitToTuple->size(idx) == 0)
591  continue; // SHALL NOT BE break
592  atomicAdd(&c.nUsedHits, 1);
593  if (hitToTuple->size(idx) > 1)
594  atomicAdd(&c.nDupHits, 1);
595  }
596  }
597 
598  template <typename TrackerTraits>
599  __global__ void kernel_countSharedHit(int *__restrict__ nshared,
601  Quality const *__restrict__ quality,
602  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
603  constexpr auto loose = pixelTrack::Quality::loose;
604 
605  auto &hitToTuple = *phitToTuple;
606  auto const &foundNtuplets = *ptuples;
607 
608  int first = blockDim.x * blockIdx.x + threadIdx.x;
609  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
610  if (hitToTuple.size(idx) < 2)
611  continue;
612 
613  int nt = 0;
614 
615  // count "good" tracks
616  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
617  if (quality[*it] < loose)
618  continue;
619  ++nt;
620  }
621 
622  if (nt < 2)
623  continue;
624 
625  // now mark each track triplet as sharing a hit
626  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
627  if (foundNtuplets.size(*it) > 3)
628  continue;
629  atomicAdd(&nshared[*it], 1);
630  }
631 
632  } // hit loop
633  }
634 
635  template <typename TrackerTraits>
636  __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
637  HitContainer<TrackerTraits> const *__restrict__ tuples,
638  Quality *__restrict__ quality,
639  bool dupPassThrough) {
640  // constexpr auto bad = pixelTrack::Quality::bad;
641  constexpr auto dup = pixelTrack::Quality::dup;
642  constexpr auto loose = pixelTrack::Quality::loose;
643  // constexpr auto strict = pixelTrack::Quality::strict;
644 
645  // quality to mark rejected
646  auto const reject = dupPassThrough ? loose : dup;
647 
648  int first = blockDim.x * blockIdx.x + threadIdx.x;
649  for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
650  if (tuples->size(idx) == 0)
651  break; //guard
652  if (quality[idx] <= reject)
653  continue;
654  if (nshared[idx] > 2)
655  quality[idx] = reject;
656  }
657  }
658 
659  // mostly for very forward triplets.....
660  template <typename TrackerTraits>
661  __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
662  uint16_t nmin,
663  bool dupPassThrough,
664  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
665  // quality to mark rejected
667 
668  auto &hitToTuple = *phitToTuple;
669 
670  int first = blockDim.x * blockIdx.x + threadIdx.x;
671  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
672  if (hitToTuple.size(idx) < 2)
673  continue;
674 
675  auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
676 
677  // full combinatorics
678  for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
679  auto const it = *ip;
680  auto qi = tracks_view[it].quality();
681  if (qi <= reject)
682  continue;
683  auto opi = tracks_view[it].state()(2);
684  auto e2opi = tracks_view[it].covariance()(9);
685  auto cti = tracks_view[it].state()(3);
686  auto e2cti = tracks_view[it].covariance()(12);
687  auto nli = tracks_view[it].nLayers();
688  for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
689  auto const jt = *jp;
690  auto qj = tracks_view[jt].quality();
691  if (qj <= reject)
692  continue;
693  auto opj = tracks_view[jt].state()(2);
694  auto ctj = tracks_view[jt].state()(3);
695  auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
696  if ((cti - ctj) * (cti - ctj) > dct)
697  continue;
698  auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
699  if ((opi - opj) * (opi - opj) > dop)
700  continue;
701  auto nlj = tracks_view[jt].nLayers();
702  if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
703  tracks_view[jt].quality() = reject;
704  else {
705  tracks_view[it].quality() = reject;
706  break;
707  }
708  }
709  }
710  }
711  }
712 
713  template <typename TrackerTraits>
714  __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
715  TkSoAView<TrackerTraits> tracks_view,
716  int nmin,
717  bool dupPassThrough,
718  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
719  // quality to mark rejected
721  // quality of longest track
723 
724  auto &hitToTuple = *phitToTuple;
725 
726  int l1end = hh.hitsLayerStart()[1];
727 
728  int first = blockDim.x * blockIdx.x + threadIdx.x;
729  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
730  if (hitToTuple.size(idx) < 2)
731  continue;
732 
733  int8_t maxNl = 0;
734 
735  // find maxNl
736  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
737  if (tracks_view[*it].quality() < longTqual)
738  continue;
739  // if (tracks_view[*it].nHits()==3) continue;
740  auto nl = tracks_view[*it].nLayers();
741  maxNl = std::max(nl, maxNl);
742  }
743 
744  if (maxNl < 4)
745  continue;
746 
747  // quad pass through (leave for tests)
748  // maxNl = std::min(4, maxNl);
749 
750  // kill all tracks shorter than maxHl (only triplets???
751  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
752  auto nl = tracks_view[*it].nLayers();
753 
754  //checking if shared hit is on bpix1 and if the tuple is short enough
755  if (idx < l1end and nl > nmin)
756  continue;
757 
758  if (nl < maxNl && tracks_view[*it].quality() > reject)
759  tracks_view[*it].quality() = reject;
760  }
761  }
762  }
763 
764  template <typename TrackerTraits>
765  __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
766  uint16_t nmin,
767  bool dupPassThrough,
768  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
769  // quality to mark rejected
770  auto const reject = pixelTrack::Quality::loose;
773 
774  auto &hitToTuple = *phitToTuple;
775 
776  int first = blockDim.x * blockIdx.x + threadIdx.x;
777  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
778  if (hitToTuple.size(idx) < 2)
779  continue;
780 
781  float mc = maxScore;
782  uint16_t im = tkNotFound;
783  bool onlyTriplets = true;
784 
785  // check if only triplets
786  for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
787  if (tracks_view[*it].quality() <= good)
788  continue;
790  if (!onlyTriplets)
791  break;
792  }
793 
794  // only triplets
795  if (!onlyTriplets)
796  continue;
797 
798  // for triplets choose best tip! (should we first find best quality???)
799  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
800  auto const it = *ip;
803  im = it;
804  }
805  }
806 
807  if (tkNotFound == im)
808  continue;
809 
810  // mark worse ambiguities
811  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
812  auto const it = *ip;
813  if (tracks_view[it].quality() > reject && it != im)
814  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
815  }
816 
817  } // loop over hits
818  }
819 
820  template <typename TrackerTraits>
821  __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
822  uint16_t nmin,
823  bool dupPassThrough,
824  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
825  // quality to mark rejected
826  auto const reject = pixelTrack::Quality::loose;
828  auto const good = pixelTrack::Quality::loose;
829 
830  auto &hitToTuple = *phitToTuple;
831 
832  int first = blockDim.x * blockIdx.x + threadIdx.x;
833  for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
834  if (hitToTuple.size(idx) < 2)
835  continue;
836 
837  float mc = maxScore;
838  uint16_t im = tkNotFound;
839 
840  // choose best tip! (should we first find best quality???)
841  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
842  auto const it = *ip;
845  im = it;
846  }
847  }
848 
849  if (tkNotFound == im)
850  continue;
851 
852  // mark worse ambiguities
853  for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
854  auto const it = *ip;
856  it != im)
857  tracks_view[it].quality() = reject; //no race: simple assignment of the same constant
858  }
859 
860  } // loop over hits
861  }
862 
863  template <typename TrackerTraits>
864  __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
866  HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
867  int32_t firstPrint,
868  int32_t lastPrint,
869  int iev) {
870  constexpr auto loose = pixelTrack::Quality::loose;
871 
872  int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
873  for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
874  i += blockDim.x * gridDim.x) {
875  auto nh = tracks_view.hitIndices().size(i);
876  if (nh < 3)
877  continue;
878  if (tracks_view[i].quality() < loose)
879  continue;
880  printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
881  10000 * iev + i,
882  int(tracks_view[i].quality()),
883  nh,
884  tracks_view[i].nLayers(),
886  tracks_view[i].pt(),
887  tracks_view[i].eta(),
891  tracks_view[i].chi2(),
892  hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
893  hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
894  hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
895  nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
896  nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
897  nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
898  nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
899  }
900  }
901 
902  __global__ void kernel_printCounters(Counters const *counters) {
903  auto const &c = *counters;
904  printf(
905  "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
906  "nDupHits | "
907  "nFishCells | "
908  "nKilledCells | "
909  "nUsedCells | nZeroTrackCells ||\n");
910  printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
911  c.nEvents,
912  c.nHits,
913  c.nCells,
914  c.nTuples,
915  c.nFitTracks,
916  c.nLooseTracks,
917  c.nGoodTracks,
918  c.nUsedHits,
919  c.nDupHits,
920  c.nFishCells,
921  c.nKilledCells,
922  c.nEmptyCells,
923  c.nZeroTrackCells);
924  printf(
925  "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
926  c.nEvents,
927  c.nHits / double(c.nEvents),
928  c.nCells / double(c.nEvents),
929  c.nTuples / double(c.nEvents),
930  c.nFitTracks / double(c.nEvents),
931  c.nLooseTracks / double(c.nEvents),
932  c.nGoodTracks / double(c.nEvents),
933  c.nUsedHits / double(c.nEvents),
934  c.nDupHits / double(c.nEvents),
935  c.nFishCells / double(c.nCells),
936  c.nKilledCells / double(c.nCells),
937  c.nEmptyCells / double(c.nCells),
938  c.nZeroTrackCells / double(c.nCells));
939  }
940 
941 } // namespace caHitNtupletGeneratorKernels
const dim3 threadIdx
Definition: cudaCompat.h:29
HitContainer< TrackerTraits > const *__restrict__ tuples
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const isOuterHitOfCell
def isnan(num)
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ quality
TrackingRecHitSoAConstView< TrackerTraits > HitsConstView
Definition: GPUCACell.h:34
const dim3 gridDim
Definition: cudaCompat.h:33
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const * cellNeighbors
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const * cellTracks
uint32_t const *__restrict__ TkSoAView< TrackerTraits > tracks_view
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
caHitNtupletGenerator::Counters Counters
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ cells
TupleMultiplicity< TrackerTraits > const * tupleMultiplicity
typename GPUCACellT< TrackerTraits >::HitsConstView HitsConstView
uint32_t const *__restrict__ TkSoAView< TrackerTraits > bool dupPassThrough
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t maxNumberOfDoublets
TkSoAView< TrackerTraits > GPUCACellT< TrackerTraits > *__restrict__ uint32_t const CellTracksVector< TrackerTraits > cms::cuda::AtomicPairCounter CAParams< TrackerTraits > params
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t uint32_t Counters * counters
int np
Definition: AMPTWrapper.h:43
static constexpr __host__ __device__ int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i)
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ CellNeighborsVector< TrackerTraits > const CellTracksVector< TrackerTraits > const OuterHitOfCell< TrackerTraits > const int32_t nHits
typename TrackSoA< TrackerTraits >::HitContainer HitContainer
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t firstPrint
__device__ __host__ Counters get() const
stack
Definition: svgfig.py:559
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const * hitToTuple
TrackSoAView< TrackerTraits > TkSoAView
uint32_t nh
const dim3 blockIdx
Definition: cudaCompat.h:32
assert(nCells)
cannot be loose
int nt
Definition: AMPTWrapper.h:42
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter GPUCACellT< TrackerTraits > const *__restrict__ uint32_t const *__restrict__ nCells
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
T __ldg(T const *x)
Definition: cudaCompat.h:137
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::View TrackSoAView
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t lastPrint
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
__shared__ uint32_t ntot
HitContainer< TrackerTraits > const *__restrict__ Quality const *__restrict__ HitToTuple< TrackerTraits > const *__restrict__ phitToTuple
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
HitContainer< TrackerTraits > const *__restrict__ ptuples