12 #include <cuda_runtime.h>
38 constexpr
float nSigma2 = 25.f;
47 uint32_t
const *__restrict__
nCells,
63 atomicAdd(&
c.nFitTracks, tupleMultiplicity->size());
68 printf(
"number of found cells %d, found tuples %d with total hits %d out of %d %d\n",
75 assert(foundNtuplets->size(apc->
get().
m) == 0);
81 if (foundNtuplets->size(idx) > 7)
82 printf(
"ERROR %d, %d\n", idx, foundNtuplets->size(idx));
84 for (
auto ih = foundNtuplets->begin(idx); ih != foundNtuplets->end(idx); ++ih)
91 printf(
"Tuples overflow\n");
92 if (*nCells >= maxNumberOfDoublets)
93 printf(
"Cells overflow\n");
94 if (cellNeighbors && cellNeighbors->
full())
95 printf(
"cellNeighbors overflow\n");
96 if (cellTracks && cellTracks->
full())
97 printf(
"cellTracks overflow\n");
98 if (
int(hitToTuple->
nOnes()) < nHits)
103 auto const &thisCell = cells[idx];
104 if (thisCell.hasFishbone() && !thisCell.isKilled())
106 if (thisCell.outerNeighbors().full())
107 printf(
"OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
108 if (thisCell.tracks().full())
109 printf(
"Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
110 if (thisCell.isKilled())
112 if (!thisCell.unused())
114 if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
120 printf(
"OuterHitOfCell overflow %d\n", idx);
129 auto const &thisCell = cells[idx];
130 if (!thisCell.isKilled())
133 for (
auto it : thisCell.tracks())
134 quality[it] = reject;
141 uint32_t
const *__restrict__ nCells,
153 auto const &thisCell = cells[idx];
155 if (thisCell.tracks().size() < 2)
161 for (
auto it : thisCell.tracks()) {
162 auto nl =
tracks.nLayers(it);
170 for (
auto it : thisCell.tracks()) {
171 if (
tracks.nLayers(it) < maxNl)
172 quality[it] = reject;
179 uint32_t
const *__restrict__ nCells,
181 bool dupPassThrough) {
190 auto const &thisCell = cells[idx];
191 if (thisCell.tracks().size() < 2)
195 uint16_t im = tkNotFound;
205 auto score = [&](
auto it) {
return std::abs(tracks->tip(it)); };
208 int ntr = thisCell.tracks().size();
209 for (
int i = 0;
i < ntr - 1; ++
i) {
210 auto it = thisCell.tracks()[
i];
211 auto qi = tracks->quality(it);
214 auto opi = tracks->stateAtBS.state(it)(2);
215 auto e2opi = tracks->stateAtBS.covariance(it)(9);
216 auto cti = tracks->stateAtBS.state(it)(3);
217 auto e2cti = tracks->stateAtBS.covariance(it)(12);
218 for (
auto j =
i + 1;
j < ntr; ++
j) {
219 auto jt = thisCell.tracks()[
j];
220 auto qj = tracks->quality(jt);
224 if (foundNtuplets->size(it) != foundNtuplets->size(jt))
227 auto opj = tracks->stateAtBS.state(jt)(2);
228 auto ctj = tracks->stateAtBS.state(jt)(3);
229 auto dct = nSigma2 * (tracks->stateAtBS.covariance(jt)(12) + e2cti);
230 if ((cti - ctj) * (cti - ctj) > dct)
232 auto dop = nSigma2 * (tracks->stateAtBS.covariance(jt)(9) + e2opi);
233 if ((opi - opj) * (opi - opj) >
dop)
235 if ((qj < qi) || (qj == qi && score(it) < score(jt)))
236 tracks->quality(jt) =
reject;
238 tracks->quality(it) =
reject;
246 for (
auto it : thisCell.tracks()) {
247 if (tracks->quality(it) > maxQual)
248 maxQual = tracks->quality(it);
251 if (maxQual <= loose)
255 for (
auto it : thisCell.tracks()) {
256 if (tracks->quality(it) == maxQual && score(it) < mc) {
262 if (tkNotFound == im)
266 for (
auto it : thisCell.tracks()) {
267 if (tracks->quality(it) > loose && it != im)
268 tracks->quality(it) =
loose;
277 uint32_t
const *__restrict__ nCells,
286 auto const &
hh = *
hhp;
292 if (0 == (firstCellIndex +
first)) {
297 for (
int idx = firstCellIndex, nt = (*nCells); idx <
nt; idx +=
gridDim.y *
blockDim.y) {
298 auto cellIndex = idx;
299 auto &thisCell = cells[idx];
301 if (
int(innerHitId) < isOuterHitOfCell.
offset)
303 int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
304 auto vi = isOuterHitOfCell[innerHitId].data();
306 auto ri = thisCell.inner_r(
hh);
307 auto zi = thisCell.inner_z(
hh);
309 auto ro = thisCell.outer_r(
hh);
310 auto zo = thisCell.outer_z(
hh);
314 auto otherCell =
__ldg(vi +
j);
315 auto &oc = cells[otherCell];
316 auto r1 = oc.inner_r(
hh);
317 auto z1 = oc.inner_z(
hh);
318 bool aligned = GPUCACell::areAlignedRZ(
326 isBarrel ? CAThetaCutBarrel : CAThetaCutForward);
327 if (aligned && thisCell.dcaCut(
hh,
332 oc.addOuterNeighbor(cellIndex, *cellNeighbors);
342 uint32_t
const *nCells,
349 auto const &
hh = *
hhp;
353 auto const &thisCell = cells[idx];
354 if (thisCell.isKilled())
357 if (thisCell.outerNeighbors().empty())
359 auto pid = thisCell.layerPairId();
360 auto doit = minHitsPerNtuplet > 3 ? pid < 3 : pid < 8 || pid > 12;
364 thisCell.find_ntuplets<6>(
375 auto &thisCell = cells[idx];
376 if (!thisCell.tracks().empty())
382 Quality const *__restrict__ quality,
386 auto nhits = foundNtuplets->size(it);
395 tupleMultiplicity->count(
nhits);
400 Quality const *__restrict__ quality,
404 auto nhits = foundNtuplets->size(it);
413 tupleMultiplicity->fill(
nhits, it);
420 Quality *__restrict__ quality) {
422 for (
int it = first, nt = tuples->nOnes(); it <
nt; it +=
gridDim.x *
blockDim.x) {
423 auto nhits = tuples->size(it);
439 for (
int i = 0;
i < 5; ++
i) {
440 isNaN |=
std::isnan(tracks->stateAtBS.state(it)(
i));
444 printf(
"NaN in fit %d size %d chi2 %f\n", it, tuples->size(it), tracks->chi2(it));
453 auto roughLog = [](
float x) {
462 uint32_t lsb = 1 < 21;
466 int ex = int(z.i >> 2) - 127;
470 const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
471 return float(ex) + frac[
f];
475 float pt = std::min<float>(tracks->pt(it), cuts.
chi2MaxPt);
477 if (tracks->chi2(it) >= chi2Cut) {
478 #ifdef NTUPLE_FIT_DEBUG
479 printf(
"Bad chi2 %d size %d pt %f eta %f chi2 %f\n",
506 Quality const *__restrict__ quality,
510 if (tuples->size(idx) == 0)
522 Quality const *__restrict__ quality,
526 if (tuples->size(idx) == 0)
528 for (
auto h = tuples->begin(idx);
h != tuples->end(idx); ++
h)
529 hitToTuple->count(*
h);
534 Quality const *__restrict__ quality,
538 if (tuples->size(idx) == 0)
540 for (
auto h = tuples->begin(idx);
h != tuples->end(idx); ++
h)
541 hitToTuple->fill(*
h, idx);
550 for (
int idx = first, ntot = tuples->totOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
551 hitDetIndices->off[idx] = tuples->off[idx];
558 hitDetIndices->content[idx] =
hh.detectorIndex(tuples->content[idx]);
569 auto nHits = tracks.nHits(idx);
571 tracks.nLayers(idx) = tracks.computeNumberOfLayers(idx);
579 for (
int idx = first, ntot = hitToTuple->nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
580 if (hitToTuple->size(idx) == 0)
583 if (hitToTuple->size(idx) > 1)
588 __global__ void kernel_countSharedHit(
int *__restrict__ nshared,
590 Quality const *__restrict__ quality,
598 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
599 if (hitToTuple.size(idx) < 2)
605 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
606 if (quality[*it] < loose)
615 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
616 if (foundNtuplets.size(*it) > 3)
624 __global__ void kernel_markSharedHit(
int const *__restrict__ nshared,
627 bool dupPassThrough) {
634 auto const reject = dupPassThrough ? loose : dup;
638 if (tuples->size(idx) == 0)
640 if (quality[idx] <= reject)
642 if (nshared[idx] > 2)
648 __global__ void kernel_rejectDuplicate(
TkSoA const *__restrict__ ptracks,
660 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
661 if (hitToTuple.size(idx) < 2)
670 auto score = [&](
auto it,
auto nl) {
return std::abs(tracks.tip(it)); };
673 for (
auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
675 auto qi = quality[it];
678 auto opi = tracks.stateAtBS.state(it)(2);
679 auto e2opi = tracks.stateAtBS.covariance(it)(9);
680 auto cti = tracks.stateAtBS.state(it)(3);
681 auto e2cti = tracks.stateAtBS.covariance(it)(12);
682 auto nli = tracks.nLayers(it);
683 for (
auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
685 auto qj = quality[jt];
688 auto opj = tracks.stateAtBS.state(jt)(2);
689 auto ctj = tracks.stateAtBS.state(jt)(3);
690 auto dct = nSigma2 * (tracks.stateAtBS.covariance(jt)(12) + e2cti);
691 if ((cti - ctj) * (cti - ctj) > dct)
693 auto dop = nSigma2 * (tracks.stateAtBS.covariance(jt)(9) + e2opi);
694 if ((opi - opj) * (opi - opj) >
dop)
696 auto nlj = tracks.nLayers(jt);
697 if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
709 TkSoA const *__restrict__ ptracks,
722 auto const &
hh = *
hhp;
726 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
727 if (hitToTuple.size(idx) < 2)
733 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
737 auto nl = tracks.nLayers(*it);
748 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
749 auto nl = tracks.nLayers(*it);
752 if (idx < l1end and nl > nmin)
755 if (nl < maxNl && quality[*it] > reject)
761 __global__ void kernel_tripletCleaner(
TkSoA const *__restrict__ ptracks,
775 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
776 if (hitToTuple.size(idx) < 2)
780 uint16_t im = tkNotFound;
781 bool onlyTriplets =
true;
784 for (
auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
785 if (quality[*it] <=
good)
787 onlyTriplets &= tracks.isTriplet(*it);
797 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
799 if (quality[it] >=
good &&
std::abs(tracks.tip(it)) < mc) {
805 if (tkNotFound == im)
809 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
811 if (quality[it] > reject && it != im)
819 TkSoA const *__restrict__ ptracks,
833 for (
int idx = first, ntot = hitToTuple.nOnes(); idx <
ntot; idx +=
gridDim.x *
blockDim.x) {
834 if (hitToTuple.size(idx) < 2)
838 uint16_t im = tkNotFound;
841 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
843 if (quality[it] >=
good &&
std::abs(tracks.tip(it)) < mc) {
849 if (tkNotFound == im)
853 for (
auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
855 if (quality[it] > reject && tracks.isTriplet(it) && it != im)
856 quality[it] = reject;
864 TkSoA const *__restrict__ ptracks,
865 Quality const *__restrict__ quality,
871 auto const &
hh = *
hhp;
872 auto const &foundNtuplets = *
ptuples;
876 auto nh = foundNtuplets.size(
i);
879 if (quality[
i] < loose)
881 printf(
"TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
894 hh.zGlobal(*foundNtuplets.begin(i)),
895 hh.zGlobal(*(foundNtuplets.begin(i) + 1)),
896 hh.zGlobal(*(foundNtuplets.begin(i) + 2)),
897 nh > 3 ?
hh.zGlobal(
int(*(foundNtuplets.begin(i) + 3))) : 0,
898 nh > 4 ?
hh.zGlobal(
int(*(foundNtuplets.begin(i) + 4))) : 0,
899 nh > 5 ?
hh.zGlobal(
int(*(foundNtuplets.begin(i) + 5))) : 0,
900 nh > 6 ?
hh.zGlobal(
int(*(foundNtuplets.begin(i) +
nh - 1))) : 0);
907 "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
911 "nUsedCells | nZeroTrackCells ||\n");
912 printf(
"Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
926 printf(
"Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
928 c.nHits /
double(
c.nEvents),
929 c.nCells /
double(
c.nEvents),
930 c.nTuples /
double(
c.nEvents),
931 c.nFitTracks /
double(
c.nEvents),
932 c.nLooseTracks /
double(
c.nEvents),
933 c.nGoodTracks /
double(
c.nEvents),
934 c.nUsedHits /
double(
c.nEvents),
935 c.nDupHits /
double(
c.nEvents),
936 c.nFishCells /
double(
c.nCells),
937 c.nKilledCells /
double(
c.nCells),
938 c.nEmptyCells /
double(
c.nCells),
939 c.nZeroTrackCells /
double(
c.nCells));
constexpr int32_t maxHitsOnTrack
const edm::EventSetup & c
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const * cellNeighbors
HitContainer const *__restrict__ ptuples
HitContainer const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ phitToTuple
unsigned long long nLooseTracks
constexpr uint32_t maxNumberOfQuadruplets
uint32_t const *__restrict__ TkSoA const *__restrict__ ptracks
TrackingRecHit2DHeterogeneous< cms::cudacompat::GPUTraits > TrackingRecHit2DGPU
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const * hitToTuple
auto const & tracks
cannot be loose
constexpr auto nOnes() const
constexpr bool full() const
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t CAHitNtupletGeneratorKernelsGPU::Counters * counters
TkSoA const *__restrict__ CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts
auto const & foundNtuplets
TrackingRecHit2DSOAView const *__restrict__ HitContainer *__restrict__ hitDetIndices
HitContainer const *__restrict__ tuples
caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity
printf("params %d %f %f %f\n", minT, eps, errmax, chi2max)
Abs< T >::type abs(const T &t)
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const isOuterHitOfCell
TrackSoA::HitContainer HitContainer
Quality *__restrict__ uint16_t nmin
TrackSoAHeterogeneousT< maxNumber()> TrackSoA
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t uint32_t maxNumberOfDoublets
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ cells
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter * apc
auto const good
min quality of good
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t firstPrint
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const * cellTracks
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t int iev
caConstants::TupleMultiplicity const *__restrict__ HitsOnGPU const *__restrict__ hhp
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ nCells
uint32_t const *__restrict__ TkSoA const *__restrict__ Quality bool dupPassThrough
constexpr bool full() const
__device__ __host__ Counters get() const
constexpr unsigned int inner_hit_id() const
constexpr auto totOnes() const
cms::cuda::OneToManyAssoc< tindex_type,-1, 4 *maxTuples > HitToTuple
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t lastPrint
unsigned long long nGoodTracks
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
constexpr uint32_t last_bpix1_detIndex
constexpr uint32_t last_barrel_detIndex
cms::cuda::OneToManyAssoc< tindex_type, maxHitsOnTrack+1, maxTuples > TupleMultiplicity
T1 atomicAdd(T1 *a, T2 b)
OuterHitOfCellContainer * container