12 #include <cuda_runtime.h>
38 constexpr
float nSigma2 = 25.f;
47 uint32_t
const *__restrict__
nCells,
63 atomicAdd(&
c.nFitTracks, tupleMultiplicity->size());
68 printf(
"number of found cells %d, found tuples %d with total hits %d out of %d %d\n",
75 assert(foundNtuplets->size(apc->
get().
m) == 0);
81 if (foundNtuplets->size(idx) > 7)
82 printf(
"ERROR %d, %d\n", idx, foundNtuplets->size(idx));
84 for (
auto ih = foundNtuplets->begin(idx); ih != foundNtuplets->end(idx); ++ih)
91 printf(
"Tuples overflow\n");
92 if (*nCells >= maxNumberOfDoublets)
93 printf(
"Cells overflow\n");
94 if (cellNeighbors && cellNeighbors->
full())
95 printf(
"cellNeighbors overflow\n");
96 if (cellTracks && cellTracks->
full())
97 printf(
"cellTracks overflow\n");
98 if (
int(hitToTuple->
nOnes()) < nHits)
103 auto const &thisCell = cells[idx];
104 if (thisCell.outerNeighbors().full())
105 printf(
"OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
106 if (thisCell.tracks().full())
107 printf(
"Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
108 if (thisCell.isKilled())
110 if (!thisCell.unused())
112 if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
118 printf(
"OuterHitOfCell overflow %d\n", idx);
127 auto const &thisCell = cells[idx];
128 if (!thisCell.isKilled())
131 for (
auto it : thisCell.tracks())
132 quality[it] = reject;
139 uint32_t
const *__restrict__ nCells,
151 auto const &thisCell = cells[idx];
153 if (thisCell.tracks().size() < 2)
159 for (
auto it : thisCell.tracks()) {
160 auto nl =
tracks.nLayers(it);
168 for (
auto it : thisCell.tracks()) {
169 if (
tracks.nLayers(it) < maxNl)
170 quality[it] = reject;
177 uint32_t
const *__restrict__ nCells,
179 bool dupPassThrough) {
188 auto const &thisCell = cells[idx];
189 if (thisCell.tracks().size() < 2)
193 uint16_t im = tkNotFound;
203 auto score = [&](
auto it) {
return std::abs(tracks->tip(it)); };
206 int ntr = thisCell.tracks().size();
207 for (
int i = 0;
i < ntr; ++
i) {
208 auto it = thisCell.tracks()[
i];
209 auto qi = tracks->quality(it);
212 auto opi = tracks->stateAtBS.state(it)(2);
213 auto e2opi = tracks->stateAtBS.covariance(it)(9);
214 auto cti = tracks->stateAtBS.state(it)(3);
215 auto e2cti = tracks->stateAtBS.covariance(it)(12);
216 for (
auto j =
i + 1;
j < ntr; ++
j) {
217 auto jt = thisCell.tracks()[
j];
218 auto qj = tracks->quality(jt);
222 if (foundNtuplets->size(it) != foundNtuplets->size(jt))
225 auto opj = tracks->stateAtBS.state(jt)(2);
226 auto ctj = tracks->stateAtBS.state(jt)(3);
227 auto dct = nSigma2 * (tracks->stateAtBS.covariance(jt)(12) + e2cti);
228 if ((cti - ctj) * (cti - ctj) > dct)
230 auto dop = nSigma2 * (tracks->stateAtBS.covariance(jt)(9) + e2opi);
231 if ((opi - opj) * (opi - opj) >
dop)
233 if ((qj < qi) || (qj == qi && score(it) < score(jt)))
234 tracks->quality(jt) =
reject;
236 tracks->quality(it) =
reject;
244 for (
auto it : thisCell.tracks()) {
245 if (tracks->quality(it) > maxQual)
246 maxQual = tracks->quality(it);
249 if (maxQual <= loose)
253 for (
auto it : thisCell.tracks()) {
254 if (tracks->quality(it) == maxQual && score(it) < mc) {
260 if (tkNotFound == im)
264 for (
auto it : thisCell.tracks()) {
265 if (tracks->quality(it) > loose && it != im)
266 tracks->quality(it) =
loose;
275 uint32_t
const *__restrict__ nCells,
280 float CAThetaCutBarrel,
281 float CAThetaCutForward,
282 float dcaCutInnerTriplet,
283 float dcaCutOuterTriplet) {
284 auto const &
hh = *
hhp;
290 if (0 == (firstCellIndex +
first)) {
295 for (
int idx = firstCellIndex, nt = (*nCells); idx <
nt; idx +=
gridDim.y *
blockDim.y) {
296 auto cellIndex = idx;
297 auto &thisCell = cells[idx];
299 if (
int(innerHitId) < isOuterHitOfCell.
offset)
301 int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
302 auto vi = isOuterHitOfCell[innerHitId].data();
304 auto ri = thisCell.inner_r(
hh);
305 auto zi = thisCell.inner_z(
hh);
307 auto ro = thisCell.outer_r(
hh);
308 auto zo = thisCell.outer_z(
hh);
312 auto otherCell =
__ldg(vi +
j);
313 auto &oc = cells[otherCell];
314 auto r1 = oc.inner_r(
hh);
315 auto z1 = oc.inner_z(
hh);
316 bool aligned = GPUCACell::areAlignedRZ(
324 isBarrel ? CAThetaCutBarrel : CAThetaCutForward);
325 if (aligned && thisCell.dcaCut(
hh,
328 : dcaCutOuterTriplet,
330 oc.addOuterNeighbor(cellIndex, *cellNeighbors);
340 uint32_t
const *nCells,
345 unsigned int minHitsPerNtuplet) {
347 auto const &
hh = *
hhp;
351 auto const &thisCell = cells[idx];
352 if (thisCell.isKilled())
355 if (thisCell.outerNeighbors().empty())
357 auto pid = thisCell.layerPairId();
358 auto doit = minHitsPerNtuplet > 3 ? pid < 3 : pid < 8 || pid > 12;
362 thisCell.find_ntuplets<6>(
373 auto &thisCell = cells[idx];
374 if (!thisCell.tracks().empty())
380 Quality const *__restrict__ quality,
384 auto nhits = foundNtuplets->size(it);
393 tupleMultiplicity->count(
nhits);
398 Quality const *__restrict__ quality,
402 auto nhits = foundNtuplets->size(it);
411 tupleMultiplicity->fill(
nhits, it);
418 Quality *__restrict__ quality) {
420 for (
int it = first, nt = tuples->nOnes(); it <
nt; it +=
gridDim.x *
blockDim.x) {
421 auto nhits = tuples->size(it);
437 for (
int i = 0;
i < 5; ++
i) {
438 isNaN |=
std::isnan(tracks->stateAtBS.state(it)(
i));
442 printf(
"NaN in fit %d size %d chi2 %f\n", it, tuples->size(it), tracks->chi2(it));
451 auto roughLog = [](
float x) {
460 uint32_t lsb = 1 < 21;
464 int ex = int(z.i >> 2) - 127;
468 const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
469 return float(ex) + frac[
f];
473 float pt = std::min<float>(tracks->pt(it), cuts.
chi2MaxPt);
475 if (tracks->chi2(it) >= chi2Cut) {
476 #ifdef NTUPLE_FIT_DEBUG
477 printf(
"Bad chi2 %d size %d pt %f eta %f chi2 %f\n",
504 Quality const *__restrict__ quality,
508 if (tuples->size(idx) == 0)
520 Quality const *__restrict__ quality,
524 if (tuples->size(idx) == 0)
526 for (
auto h = tuples->begin(idx);
h != tuples->end(idx); ++
h)
527 hitToTuple->count(*
h);
532 Quality const *__restrict__ quality,
536 if (tuples->size(idx) == 0)
538 for (
auto h = tuples->begin(idx);
h != tuples->end(idx); ++
h)
539 hitToTuple->fill(*
h, idx);
548 for (
int idx = first, ntot = tuples->totOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
549 hitDetIndices->off[idx] = tuples->off[idx];
556 hitDetIndices->content[idx] =
hh.detectorIndex(tuples->content[idx]);
564 auto nHits = tracks.nHits(idx);
567 tracks.nLayers(idx) = tracks.computeNumberOfLayers(idx);
575 for (
int idx = first, ntot = hitToTuple->nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
576 if (hitToTuple->size(idx) == 0)
579 if (hitToTuple->size(idx) > 1)
584 __global__ void kernel_countSharedHit(
int *__restrict__ nshared,
586 Quality const *__restrict__ quality,
594 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
595 if (hitToTuple.size(idx) < 2)
601 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
602 if (quality[*it] < loose)
611 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
612 if (foundNtuplets.size(*it) > 3)
620 __global__ void kernel_markSharedHit(
int const *__restrict__ nshared,
623 bool dupPassThrough) {
630 auto const reject = dupPassThrough ? loose : dup;
634 if (tuples->size(idx) == 0)
636 if (quality[idx] <= reject)
638 if (nshared[idx] > 2)
644 __global__ void kernel_rejectDuplicate(
TkSoA const *__restrict__ ptracks,
656 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
657 if (hitToTuple.size(idx) < 2)
666 auto score = [&](
auto it,
auto nl) {
return std::abs(tracks.tip(it)); };
669 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
671 auto qi = quality[it];
674 auto opi = tracks.stateAtBS.state(it)(2);
675 auto e2opi = tracks.stateAtBS.covariance(it)(9);
676 auto cti = tracks.stateAtBS.state(it)(3);
677 auto e2cti = tracks.stateAtBS.covariance(it)(12);
678 auto nli = tracks.nLayers(it);
679 for (
auto jp = ip + 1; jp != hitToTuple.end(idx); ++jp) {
681 auto qj = quality[jt];
684 auto opj = tracks.stateAtBS.state(jt)(2);
685 auto ctj = tracks.stateAtBS.state(jt)(3);
686 auto dct = nSigma2 * (tracks.stateAtBS.covariance(jt)(12) + e2cti);
687 if ((cti - ctj) * (cti - ctj) > dct)
689 auto dop = nSigma2 * (tracks.stateAtBS.covariance(jt)(9) + e2opi);
690 if ((opi - opj) * (opi - opj) >
dop)
692 auto nlj = tracks.nLayers(jt);
693 if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
705 TkSoA const *__restrict__ ptracks,
718 auto const &
hh = *
hhp;
722 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
723 if (hitToTuple.size(idx) < 2)
729 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
733 auto nl = tracks.nLayers(*it);
744 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
745 auto nl = tracks.nLayers(*it);
748 if (idx < l1end and nl > nmin)
751 if (nl < maxNl && quality[*it] > reject)
757 __global__ void kernel_tripletCleaner(
TkSoA const *__restrict__ ptracks,
771 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
772 if (hitToTuple.size(idx) < 2)
776 uint16_t im = tkNotFound;
777 bool onlyTriplets =
true;
780 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
781 if (quality[*it] <=
good)
783 onlyTriplets &= tracks.isTriplet(*it);
793 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
795 if (quality[it] >=
good &&
std::abs(tracks.tip(it)) < mc) {
801 if (tkNotFound == im)
805 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
807 if (quality[it] > reject && it != im)
815 TkSoA const *__restrict__ ptracks,
829 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
830 if (hitToTuple.size(idx) < 2)
834 uint16_t im = tkNotFound;
837 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
839 if (quality[it] >=
good &&
std::abs(tracks.tip(it)) < mc) {
845 if (tkNotFound == im)
849 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
851 if (quality[it] > reject && tracks.isTriplet(it) && it != im)
852 quality[it] = reject;
860 TkSoA const *__restrict__ ptracks,
861 Quality const *__restrict__ quality,
865 auto const &foundNtuplets = *
ptuples;
869 auto nh = foundNtuplets.size(
i);
872 printf(
"TK: %d %d %d %f %f %f %f %f %f %f %d %d %d %d %d\n",
884 *foundNtuplets.begin(i),
885 *(foundNtuplets.begin(i) + 1),
886 *(foundNtuplets.begin(i) + 2),
887 nh > 3 ?
int(*(foundNtuplets.begin(i) + 3)) : -1,
888 nh > 4 ? int(*(foundNtuplets.begin(i) + 4)) : -1);
895 "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
898 "nUsedCells | nZeroTrackCells ||\n");
899 printf(
"Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
912 printf(
"Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f||\n",
914 c.nHits /
double(
c.nEvents),
915 c.nCells /
double(
c.nEvents),
916 c.nTuples /
double(
c.nEvents),
917 c.nFitTracks /
double(
c.nEvents),
918 c.nLooseTracks /
double(
c.nEvents),
919 c.nGoodTracks /
double(
c.nEvents),
920 c.nUsedHits /
double(
c.nEvents),
921 c.nDupHits /
double(
c.nEvents),
922 c.nKilledCells /
double(
c.nCells),
923 c.nEmptyCells /
double(
c.nCells),
924 c.nZeroTrackCells /
double(
c.nCells));
constexpr int32_t maxHitsOnTrack
const edm::EventSetup & c
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const * cellNeighbors
HitContainer const *__restrict__ ptuples
HitContainer const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ phitToTuple
unsigned long long nLooseTracks
uint32_t const *__restrict__ Quality * quality
constexpr uint32_t maxNumberOfQuadruplets
uint32_t const *__restrict__ TkSoA const *__restrict__ ptracks
auto const & foundNtuplets
TrackingRecHit2DHeterogeneous< cms::cudacompat::GPUTraits > TrackingRecHit2DGPU
static constexpr int32_t stride()
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const * hitToTuple
auto const & tracks
cannot be loose
constexpr auto nOnes() const
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter * apc
constexpr bool full() const
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
TkSoA const *__restrict__ CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts
TrackingRecHit2DSOAView const *__restrict__ HitContainer *__restrict__ hitDetIndices
HitContainer const *__restrict__ tuples
caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity
printf("params %d %f %f %f\n", minT, eps, errmax, chi2max)
Abs< T >::type abs(const T &t)
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const isOuterHitOfCell
uint16_t const *__restrict__ x
TrackSoA::HitContainer HitContainer
Quality *__restrict__ uint16_t nmin
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t maxNumberOfDoublets
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ cells
auto const good
min quality of good
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const * cellTracks
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ hhp
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ nCells
uint32_t const *__restrict__ TkSoA const *__restrict__ Quality bool dupPassThrough
constexpr bool full() const
__device__ __host__ Counters get() const
constexpr unsigned int inner_hit_id() const
constexpr auto totOnes() const
cms::cuda::OneToManyAssoc< tindex_type,-1, 4 *maxTuples > HitToTuple
unsigned long long nGoodTracks
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int iev
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
constexpr uint32_t last_bpix1_detIndex
constexpr uint32_t last_barrel_detIndex
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t maxPrint
cms::cuda::OneToManyAssoc< tindex_type, maxHitsOnTrack+1, maxTuples > TupleMultiplicity
T1 atomicAdd(T1 *a, T2 b)
OuterHitOfCellContainer * container