1 #include <alpaka/alpaka.hpp> 16 using namespace reco::pfClustering;
32 float mag1 = sqrtf(pos1.
x * pos1.
x + pos1.
y * pos1.
y + pos1.
z * pos1.
z);
33 float cosTheta1 = mag1 > 0.0 ? pos1.
z / mag1 : 1.0;
34 float eta1 = 0.5f * logf((1.0
f + cosTheta1) / (1.0
f - cosTheta1));
35 float phi1 = atan2f(pos1.
y, pos1.
x);
37 float mag2 = sqrtf(pos2.
x * pos2.
x + pos2.
y * pos2.
y + pos2.
z * pos2.
z);
38 float cosTheta2 =
mag2 > 0.0 ? pos2.
z /
mag2 : 1.0;
39 float eta2 = 0.5f * logf((1.0
f + cosTheta2) / (1.0
f - cosTheta2));
40 float phi2 = atan2f(pos2.
y, pos2.
x);
45 return (deta * deta + dphi * dphi);
57 int seedIdx = pfClusteringVars[topoSeedBegin + seedNum].topoSeedList();
58 return fracView[pfClusteringVars[seedIdx].seedFracOffsets() + rhNum].frac();
62 template <
bool debug = false>
63 ALPAKA_FN_ACC
static void updateClusterPos(reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
67 reco::PFRecHitDeviceCollection::ConstView
pfRecHits,
71 const auto norm = (
frac < pfClusParams.minFracInCalc() ? 0.0f :
std::max(0.0
f, logf(rh_energy * rhENormInv)));
73 printf(
"\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n",
82 pos4.
x += rechitPos.
x * norm;
83 pos4.
y += rechitPos.
y * norm;
84 pos4.
z += rechitPos.
z * norm;
90 template <
bool debug = false,
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
93 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
94 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
97 reco::PFRecHitDeviceCollection::ConstView
pfRecHits,
101 int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
103 int&
i = alpaka::declareSharedVar<int, __COUNTER__>(acc);
104 int& nRHOther = alpaka::declareSharedVar<int, __COUNTER__>(acc);
105 unsigned int& iter = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
106 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
107 float& clusterEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
108 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
109 float& seedEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
110 Position4& clusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
111 Position4& prevClusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
112 Position4& seedPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
113 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
115 i = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
116 nRHOther = nRHTopo - 1;
118 clusterPos = seedPos;
119 prevClusterPos = seedPos;
121 clusterEnergy = seedEnergy;
122 tol = pfClusParams.stoppingTolerance();
128 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[
pfRecHits[
i].depth() - 1];
130 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[
pfRecHits[
i].depth() - 1];
133 printf(
"Rechit %d has invalid layer %d!\n",
i,
pfRecHits[
i].layer());
140 alpaka::syncBlockThreads(acc);
143 int rhFracOffset = -1;
145 float rhEnergy = -1., rhPosNorm = -1.;
147 if (tid < nRHOther) {
149 pfClusteringVars[
i].seedFracOffsets() + tid + 1;
150 j = fracView[rhFracOffset].pfrhIdx();
153 rhPosNorm = fmaxf(0., logf(rhEnergy * rhENormInv));
155 alpaka::syncBlockThreads(acc);
160 printf(
"\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
162 float dist2 = -1., d2 = -1.,
fraction = -1.;
163 if (tid < nRHOther) {
165 dist2 = (clusterPos.x - rhPos.
x) * (clusterPos.x - rhPos.
x) +
166 (clusterPos.y - rhPos.
y) * (clusterPos.y - rhPos.
y) +
167 (clusterPos.z - rhPos.
z) * (clusterPos.z - rhPos.
z);
169 d2 = dist2 / pfClusParams.showerSigma2();
170 fraction = clusterEnergy * rhENormInv * expf(-0.5
f * d2);
177 fracView[rhFracOffset].frac() =
fraction;
179 alpaka::syncBlockThreads(acc);
183 printf(
"Computing cluster position for topoId %d\n", topoId);
188 clusterPos = seedPos;
189 clusterEnergy = seedEnergy;
191 alpaka::syncBlockThreads(acc);
196 alpaka::atomicAdd(acc, &clusterPos.x, rhPos.
x * rhPosNorm, alpaka::hierarchy::Threads{});
197 alpaka::atomicAdd(acc, &clusterPos.y, rhPos.
y * rhPosNorm, alpaka::hierarchy::Threads{});
198 alpaka::atomicAdd(acc, &clusterPos.z, rhPos.
z * rhPosNorm, alpaka::hierarchy::Threads{});
201 alpaka::syncBlockThreads(acc);
205 if (clusterPos.w >= pfClusParams.minAllowedNormalization()) {
207 clusterPos.x /= clusterPos.w;
208 clusterPos.y /= clusterPos.w;
209 clusterPos.z /= clusterPos.w;
212 printf(
"\tPF cluster (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
220 printf(
"\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n",
223 pfClusParams.minAllowedNormalization());
228 float diff2 =
dR2(prevClusterPos, clusterPos);
230 printf(
"\tPF cluster (seed %d) has diff2 = %f\n",
i, diff2);
231 prevClusterPos = clusterPos;
233 float tol2 = tol * tol;
235 notDone = (diff2 > tol2) && (iter < pfClusParams.maxIterations());
238 printf(
"\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
240 printf(
"\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
243 alpaka::syncBlockThreads(acc);
247 pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
248 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
249 clusterView[seedIdx].energy() = clusterEnergy;
250 clusterView[seedIdx].x() = clusterPos.x;
251 clusterView[seedIdx].y() = clusterPos.y;
252 clusterView[seedIdx].z() = clusterPos.z;
258 template <
bool debug = false,
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
261 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
262 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
266 reco::PFRecHitDeviceCollection::ConstView
pfRecHits,
270 int tid = alpaka::getIdx<alpaka::Block,
274 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
275 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
276 int&
stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
277 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
278 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
279 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
280 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
281 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
282 auto& clusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
283 auto& prevClusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
284 auto& clusterEnergy = alpaka::declareSharedVar<float[100], __COUNTER__>(acc);
285 auto& rhFracSum = alpaka::declareSharedVar<float[threadsPerBlockForClustering], __COUNTER__>(acc);
286 auto&
seeds = alpaka::declareSharedVar<int[100], __COUNTER__>(acc);
287 auto& rechits = alpaka::declareSharedVar<int[threadsPerBlockForClustering], __COUNTER__>(acc);
290 nRHNotSeed = nRHTopo - nSeeds + 1;
291 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
292 tol = pfClusParams.stoppingTolerance() *
293 powf(fmaxf(1.0, nSeeds - 1), 2.0);
294 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
298 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
304 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[
pfRecHits[
i].depth() - 1];
306 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[
pfRecHits[
i].depth() - 1];
309 printf(
"Rechit %d has invalid layer %d!\n",
i,
pfRecHits[
i].layer());
313 alpaka::syncBlockThreads(acc);
316 seeds[tid] = pfClusteringVars[topoSeedBegin + tid].topoSeedList();
317 if (tid < nRHNotSeed - 1)
319 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + tid + 1]
322 alpaka::syncBlockThreads(acc);
326 printf(
"\n===========================================================================================\n");
327 printf(
"Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
328 for (
int s = 0;
s < nSeeds;
s++) {
333 if (nRHTopo == nSeeds) {
336 printf(
") and other rechits (");
337 for (
int r = 1; r < nRHNotSeed; r++) {
341 printf(
"Invalid rhNum (%d) for get RhFracIdx!\n", r);
343 printf(
"%d", rechits[r - 1]);
348 alpaka::syncBlockThreads(acc);
355 prevClusterPos[tid] = clusterPos[tid];
357 for (
int r = 0; r < (nRHNotSeed - 1); r++) {
358 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
359 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].frac() = -1.;
362 alpaka::syncBlockThreads(acc);
364 int rhThreadIdx = -1;
366 if (tid < (nRHNotSeed - 1)) {
367 rhThreadIdx = rechits[tid];
372 int seedThreadIdx = -1;
374 float seedEnergy = -1.;
378 printf(
"tid: %d\n", tid);
381 pfRecHits[seedThreadIdx].neighbours()(1),
382 pfRecHits[seedThreadIdx].neighbours()(2),
383 pfRecHits[seedThreadIdx].neighbours()(3)};
384 seedEnergy =
pfRecHits[seedThreadIdx].energy();
393 printf(
"\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
401 if (tid < (nRHNotSeed - 1)) {
402 for (
int s = 0;
s < nSeeds;
s++) {
403 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
404 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
405 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
407 float d2 = dist2 / pfClusParams.showerSigma2();
408 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
413 alpaka::syncBlockThreads(acc);
415 if (tid < (nRHNotSeed - 1)) {
416 for (
int s = 0;
s < nSeeds;
s++) {
418 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
419 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
420 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
422 float d2 = dist2 / pfClusParams.showerSigma2();
423 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
425 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
426 float fracpct =
fraction / rhFracSum[tid];
427 if (fracpct >
cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
428 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = fracpct;
430 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
433 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
437 alpaka::syncBlockThreads(acc);
441 printf(
"Computing cluster position for topoId %d\n", topoId);
446 clusterPos[tid] = seedInitClusterPos;
447 clusterEnergy[tid] = seedEnergy;
449 printf(
"Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
459 alpaka::syncBlockThreads(acc);
463 for (
int r = 0; r < nRHNotSeed - 1; r++) {
465 float frac =
getRhFrac(pfClusteringVars, topoSeedBegin, fracView, tid, r + 1);
470 if (nSeeds == 1 ||
j == seedNeighbors.
x ||
j == seedNeighbors.
y ||
j == seedNeighbors.
z ||
471 j == seedNeighbors.
w)
476 alpaka::syncBlockThreads(acc);
480 if (clusterPos[tid].
w >= pfClusParams.minAllowedNormalization()) {
482 clusterPos[tid].x /= clusterPos[tid].w;
483 clusterPos[tid].y /= clusterPos[tid].w;
484 clusterPos[tid].z /= clusterPos[tid].w;
487 printf(
"\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
496 printf(
"\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
500 pfClusParams.minAllowedNormalization());
501 clusterPos[tid].x = 0.0;
502 clusterPos[tid].y = 0.0;
503 clusterPos[tid].z = 0.0;
506 alpaka::syncBlockThreads(acc);
509 float delta2 =
dR2(prevClusterPos[tid], clusterPos[tid]);
511 printf(
"\tCluster %d (seed %d) has delta2 = %f\n", tid,
seeds[tid], delta2);
513 prevClusterPos[tid] = clusterPos[tid];
515 alpaka::syncBlockThreads(acc);
518 float tol2 = tol * tol;
520 notDone = (diff2 > tol2) && ((
unsigned int)iter < pfClusParams.maxIterations());
523 printf(
"\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
525 printf(
"\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
528 alpaka::syncBlockThreads(acc);
533 int rhIdx = pfClusteringVars[tid + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
534 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
535 clusterView[seedIdx].energy() = clusterEnergy[tid];
536 clusterView[seedIdx].x() = clusterPos[tid].x;
537 clusterView[seedIdx].y() = clusterPos[tid].y;
538 clusterView[seedIdx].z() = clusterPos[tid].z;
545 template <
bool debug = false,
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
547 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
548 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
552 reco::PFRecHitDeviceCollection::ConstView
pfRecHits,
556 Position4* __restrict__ globalClusterPos,
557 Position4* __restrict__ globalPrevClusterPos,
558 float* __restrict__ globalClusterEnergy,
559 float* __restrict__ globalRhFracSum,
560 int* __restrict__ globalSeeds,
561 int* __restrict__ globalRechits) {
562 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
563 int&
blockIdx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
564 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
565 int&
stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
566 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
567 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
568 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
569 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
570 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
575 float* clusterEnergy = globalClusterEnergy +
blockIdx;
576 float* rhFracSum = globalRhFracSum +
blockIdx;
578 int* rechits = globalRechits +
blockIdx;
581 nRHNotSeed = nRHTopo - nSeeds + 1;
582 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
583 tol = pfClusParams.stoppingTolerance() *
584 powf(fmaxf(1.0, nSeeds - 1), 2.0);
585 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
589 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
595 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[
pfRecHits[
i].depth() - 1];
597 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[
pfRecHits[
i].depth() - 1];
600 printf(
"Rechit %d has invalid layer %d!\n",
i,
pfRecHits[
i].layer());
604 alpaka::syncBlockThreads(acc);
606 for (
int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
n < nRHTopo;
n +=
stride) {
608 seeds[
n] = pfClusteringVars[topoSeedBegin +
n].topoSeedList();
609 if (
n < nRHNotSeed - 1)
611 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() +
n + 1]
614 alpaka::syncBlockThreads(acc);
618 printf(
"\n===========================================================================================\n");
619 printf(
"Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
620 for (
int s = 0;
s < nSeeds;
s++) {
625 if (nRHTopo == nSeeds) {
628 printf(
") and other rechits (");
629 for (
int r = 1; r < nRHNotSeed; r++) {
633 printf(
"Invalid rhNum (%d) for get RhFracIdx!\n", r);
635 printf(
"%d", rechits[r - 1]);
640 alpaka::syncBlockThreads(acc);
644 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
647 prevClusterPos[
s] = clusterPos[
s];
649 for (
int r = 0; r < (nRHNotSeed - 1); r++) {
650 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
651 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].frac() = -1.;
654 alpaka::syncBlockThreads(acc);
659 printf(
"\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
665 for (
int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid +=
stride) {
667 int rhThreadIdx = rechits[tid];
670 for (
int s = 0;
s < nSeeds;
s++) {
671 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
672 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
673 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
675 float d2 = dist2 / pfClusParams.showerSigma2();
676 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
681 alpaka::syncBlockThreads(acc);
683 for (
int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid +=
stride) {
684 int rhThreadIdx = rechits[tid];
687 for (
int s = 0;
s < nSeeds;
s++) {
689 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
690 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
691 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
693 float d2 = dist2 / pfClusParams.showerSigma2();
694 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
696 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
697 float fracpct =
fraction / rhFracSum[tid];
698 if (fracpct >
cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
699 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = fracpct;
701 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
704 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
708 alpaka::syncBlockThreads(acc);
712 printf(
"Computing cluster position for topoId %d\n", topoId);
716 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
721 clusterEnergy[
s] =
pfRecHits[seedRhIdx].energy();
723 printf(
"Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
733 alpaka::syncBlockThreads(acc);
736 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
738 for (
int r = 0; r < nRHNotSeed - 1; r++) {
740 float frac =
getRhFrac(pfClusteringVars, topoSeedBegin, fracView,
s, r + 1);
745 if (nSeeds == 1 ||
j ==
pfRecHits[seedRhIdx].neighbours()(0) ||
j ==
pfRecHits[seedRhIdx].neighbours()(1) ||
751 alpaka::syncBlockThreads(acc);
754 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
755 if (clusterPos[
s].
w >= pfClusParams.minAllowedNormalization()) {
757 clusterPos[
s].x /= clusterPos[
s].w;
758 clusterPos[
s].y /= clusterPos[
s].w;
759 clusterPos[
s].z /= clusterPos[
s].w;
762 printf(
"\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
771 printf(
"\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
775 pfClusParams.minAllowedNormalization());
776 clusterPos[
s].x = 0.0;
777 clusterPos[
s].y = 0.0;
778 clusterPos[
s].z = 0.0;
781 alpaka::syncBlockThreads(acc);
783 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
784 float delta2 =
dR2(prevClusterPos[
s], clusterPos[
s]);
786 printf(
"\tCluster %d (seed %d) has delta2 = %f\n",
s,
seeds[
s], delta2);
788 prevClusterPos[
s] = clusterPos[
s];
790 alpaka::syncBlockThreads(acc);
793 float tol2 = tol * tol;
795 notDone = (diff2 > tol2) && ((
unsigned int)iter < pfClusParams.maxIterations());
798 printf(
"\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
800 printf(
"\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
803 alpaka::syncBlockThreads(acc);
806 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
807 int rhIdx = pfClusteringVars[
s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
808 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
809 clusterView[seedIdx].energy() =
pfRecHits[
s].energy();
814 alpaka::syncBlockThreads(acc);
819 template <
bool debug = false,
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
822 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
823 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
827 reco::PFRecHitDeviceCollection::ConstView
pfRecHits,
831 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
832 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
833 int&
stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
834 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
835 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
836 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
837 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
838 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
840 auto& clusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
841 auto& prevClusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
842 auto& clusterEnergy = alpaka::declareSharedVar<float[400], __COUNTER__>(acc);
843 auto& rhFracSum = alpaka::declareSharedVar<float[1500], __COUNTER__>(acc);
844 auto&
seeds = alpaka::declareSharedVar<int[400], __COUNTER__>(acc);
845 auto& rechits = alpaka::declareSharedVar<int[1500], __COUNTER__>(acc);
848 nRHNotSeed = nRHTopo - nSeeds + 1;
849 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
850 tol = pfClusParams.stoppingTolerance() *
851 powf(fmaxf(1.0, nSeeds - 1), 2.0);
852 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
856 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
862 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[
pfRecHits[
i].depth() - 1];
864 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[
pfRecHits[
i].depth() - 1];
867 printf(
"Rechit %d has invalid layer %d!\n",
i,
pfRecHits[
i].layer());
871 alpaka::syncBlockThreads(acc);
873 for (
int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
n < nRHTopo;
n +=
stride) {
875 seeds[
n] = pfClusteringVars[topoSeedBegin +
n].topoSeedList();
876 if (
n < nRHNotSeed - 1)
878 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() +
n + 1]
881 alpaka::syncBlockThreads(acc);
885 printf(
"\n===========================================================================================\n");
886 printf(
"Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
887 for (
int s = 0;
s < nSeeds;
s++) {
892 if (nRHTopo == nSeeds) {
895 printf(
") and other rechits (");
896 for (
int r = 1; r < nRHNotSeed; r++) {
900 printf(
"Invalid rhNum (%d) for get RhFracIdx!\n", r);
902 printf(
"%d", rechits[r - 1]);
907 alpaka::syncBlockThreads(acc);
911 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
914 prevClusterPos[
s] = clusterPos[
s];
916 for (
int r = 0; r < (nRHNotSeed - 1); r++) {
917 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
918 fracView[pfClusteringVars[
i].seedFracOffsets() + r + 1].frac() = -1.;
921 alpaka::syncBlockThreads(acc);
926 printf(
"\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
932 for (
int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid +=
stride) {
934 int rhThreadIdx = rechits[tid];
937 for (
int s = 0;
s < nSeeds;
s++) {
938 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
939 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
940 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
942 float d2 = dist2 / pfClusParams.showerSigma2();
943 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
948 alpaka::syncBlockThreads(acc);
950 for (
int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid +=
stride) {
951 int rhThreadIdx = rechits[tid];
954 for (
int s = 0;
s < nSeeds;
s++) {
956 float dist2 = (clusterPos[
s].x - rhThreadPos.
x) * (clusterPos[
s].
x - rhThreadPos.
x) +
957 (clusterPos[
s].y - rhThreadPos.
y) * (clusterPos[
s].y - rhThreadPos.
y) +
958 (clusterPos[
s].z - rhThreadPos.
z) * (clusterPos[
s].z - rhThreadPos.
z);
960 float d2 = dist2 / pfClusParams.showerSigma2();
961 float fraction = clusterEnergy[
s] * rhENormInv * expf(-0.5
f * d2);
963 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
964 float fracpct =
fraction / rhFracSum[tid];
965 if (fracpct >
cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
966 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = fracpct;
968 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
971 fracView[pfClusteringVars[
i].seedFracOffsets() + tid + 1].frac() = -1;
975 alpaka::syncBlockThreads(acc);
979 printf(
"Computing cluster position for topoId %d\n", topoId);
983 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
988 clusterEnergy[
s] =
pfRecHits[seedRhIdx].energy();
990 printf(
"Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
1000 alpaka::syncBlockThreads(acc);
1003 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
1005 for (
int r = 0; r < nRHNotSeed - 1; r++) {
1007 float frac =
getRhFrac(pfClusteringVars, topoSeedBegin, fracView,
s, r + 1);
1012 if (nSeeds == 1 ||
j ==
pfRecHits[seedRhIdx].neighbours()(0) ||
j ==
pfRecHits[seedRhIdx].neighbours()(1) ||
1018 alpaka::syncBlockThreads(acc);
1021 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
1022 if (clusterPos[
s].
w >= pfClusParams.minAllowedNormalization()) {
1024 clusterPos[
s].x /= clusterPos[
s].w;
1025 clusterPos[
s].y /= clusterPos[
s].w;
1026 clusterPos[
s].z /= clusterPos[
s].w;
1029 printf(
"\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
1038 printf(
"\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
1042 pfClusParams.minAllowedNormalization());
1043 clusterPos[
s].x = 0.0;
1044 clusterPos[
s].y = 0.0;
1045 clusterPos[
s].z = 0.0;
1048 alpaka::syncBlockThreads(acc);
1050 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
1051 float delta2 =
dR2(prevClusterPos[
s], clusterPos[
s]);
1053 printf(
"\tCluster %d (seed %d) has delta2 = %f\n",
s,
seeds[
s], delta2);
1055 prevClusterPos[
s] = clusterPos[
s];
1057 alpaka::syncBlockThreads(acc);
1060 float tol2 = tol * tol;
1062 notDone = (diff2 > tol2) && ((
unsigned int)iter < pfClusParams.maxIterations());
1065 printf(
"\tTopoId %d has diff2 = %f greater than tolerance %f (continuing)\n", topoId, diff2, tol2);
1067 printf(
"\tTopoId %d has diff2 = %f LESS than tolerance %f (terminating!)\n", topoId, diff2, tol2);
1070 alpaka::syncBlockThreads(acc);
1073 for (
int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
s < nSeeds;
s +=
stride) {
1074 int rhIdx = pfClusteringVars[
s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1075 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1076 clusterView[seedIdx].energy() =
pfRecHits[
s].energy();
1086 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1089 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1090 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
1094 uint32_t* __restrict__ nSeeds)
const {
1098 clusterView.size() = nRH;
1103 pfClusteringVars[
i].pfrh_isSeed() = 0;
1104 pfClusteringVars[
i].rhCount() = 0;
1105 pfClusteringVars[
i].topoSeedCount() = 0;
1106 pfClusteringVars[
i].topoRHCount() = 0;
1107 pfClusteringVars[
i].seedFracOffsets() = -1;
1108 pfClusteringVars[
i].topoSeedOffsets() = -1;
1109 pfClusteringVars[
i].topoSeedList() = -1;
1110 clusterView[
i].seedRHIdx() = -1;
1116 float seedThreshold = 9999.;
1117 float topoThreshold = 9999.;
1124 seedThreshold = pfClusParams.seedEThresholdHB_vec()[depthOffset];
1125 topoThreshold = pfClusParams.topoEThresholdHB_vec()[depthOffset];
1127 seedThreshold = pfClusParams.seedEThresholdHE_vec()[depthOffset];
1128 topoThreshold = pfClusParams.topoEThresholdHE_vec()[depthOffset];
1138 pfClusteringVars[
i].pfrh_isSeed() = 1;
1139 for (
int k = 0;
k < 4;
k++) {
1143 pfClusteringVars[
i].pfrh_isSeed() = 0;
1147 if (pfClusteringVars[
i].pfrh_isSeed())
1154 pfClusteringVars[
i].pfrh_passTopoThresh() =
true;
1155 pfClusteringVars[
i].pfrh_topoId() =
i;
1157 pfClusteringVars[
i].pfrh_passTopoThresh() =
false;
1158 pfClusteringVars[
i].pfrh_topoId() = -1;
1167 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1172 uint32_t* __restrict__ nSeeds)
const {
1176 pfClusteringVars.nEdges() = nRH * 8;
1177 pfClusteringEdgeVars[nRH].pfrh_edgeIdx() = nRH * 8;
1180 pfClusteringEdgeVars[
i].pfrh_edgeIdx() =
i * 8;
1181 pfClusteringVars[
i].pfrh_topoId() = 0;
1182 for (
int j = 0;
j < 8;
j++) {
1184 pfClusteringEdgeVars[
i * 8 +
j].pfrh_edgeList() =
i;
1186 pfClusteringEdgeVars[
i * 8 +
j].pfrh_edgeList() =
pfRecHits[
i].neighbours()(
j);
1197 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1202 uint32_t* __restrict__ nSeeds)
const {
1204 int& totalSeedOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1205 int& totalSeedFracOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1209 pfClusteringVars.nTopos() = 0;
1210 pfClusteringVars.nRHFracs() = 0;
1211 totalSeedOffset = 0;
1212 totalSeedFracOffset = 0;
1213 pfClusteringVars.pcrhFracSize() = 0;
1216 alpaka::syncBlockThreads(acc);
1220 for (
int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1221 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1222 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = -1;
1223 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1228 if (topoId == rhIdx) {
1230 pfClusteringVars[topoIdx].topoIds() =
1234 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1240 alpaka::syncBlockThreads(acc);
1243 for (
int topoId = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; topoId < nRH;
1244 topoId += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1245 if (pfClusteringVars[topoId].topoSeedCount() > 0) {
1248 pfClusteringVars[topoId].topoSeedOffsets() =
offset;
1251 alpaka::syncBlockThreads(acc);
1255 for (
int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1256 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1257 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1258 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1261 int seedIdx = pfClusteringVars[topoId].topoSeedOffsets() +
k;
1262 if ((
unsigned int)seedIdx >= *nSeeds)
1263 printf(
"Warning(contraction) %8d > %8d should not happen, check topoId: %d has %d rh\n",
1268 pfClusteringVars[seedIdx].topoSeedList() = rhIdx;
1269 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = seedIdx;
1270 clusterView[seedIdx].topoId() = topoId;
1271 clusterView[seedIdx].seedRHIdx() = rhIdx;
1272 clusterView[seedIdx].depth() =
pfRecHits[rhIdx].depth();
1276 alpaka::syncBlockThreads(acc);
1279 for (
int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1280 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1281 pfClusteringVars[rhIdx].rhCount() = 1;
1283 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1284 if (pfClusteringVars[rhIdx].pfrh_isSeed() && topoId > -1) {
1289 pfClusteringVars[rhIdx].seedFracOffsets() =
offset;
1292 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracOffset() =
1293 pfClusteringVars[rhIdx].seedFracOffsets();
1294 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracSize() =
1295 pfClusteringVars[topoId].topoRHCount() - pfClusteringVars[topoId].topoSeedCount() + 1;
1299 alpaka::syncBlockThreads(acc);
1302 pfClusteringVars.pcrhFracSize() = totalSeedFracOffset;
1303 pfClusteringVars.nRHFracs() = totalSeedFracOffset;
1304 clusterView.nRHFracs() = totalSeedFracOffset;
1305 clusterView.nSeeds() = *nSeeds;
1306 clusterView.nTopos() = pfClusteringVars.nTopos();
1308 if (pfClusteringVars.pcrhFracSize() > 200000)
1309 printf(
"At the end of topoClusterContraction, found large *pcrhFracSize = %d\n",
1310 pfClusteringVars.pcrhFracSize());
1319 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1329 int topoId = pfClusteringVars[
i].pfrh_topoId();
1330 if (topoId > -1 && pfClusteringVars[
i].pfrh_isSeed() && topoId == pfClusteringVars[
j].pfrh_topoId()) {
1331 if (!pfClusteringVars[
j].pfrh_isSeed()) {
1333 acc, &pfClusteringVars[
i].rhCount(), 1);
1334 auto fraction = fracView[pfClusteringVars[
i].seedFracOffsets() +
k];
1336 fraction.pfcIdx() = pfClusteringVars[
i].rhIdxToSeedIdx();
1337 }
else if (
i ==
j) {
1338 auto seed = fracView[pfClusteringVars[
i].seedFracOffsets()];
1341 seed.pfcIdx() = pfClusteringVars[
i].rhIdxToSeedIdx();
1350 template <
bool debug = false,
typename TAcc,
typename = std::enable_if<!std::is_same_v<Device, alpaka::DevCpu>>>
1353 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1354 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
1359 int& topoId = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1360 int& nRHTopo = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1361 int& nSeeds = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1364 topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
1365 nRHTopo = pfClusteringVars[topoId].topoRHCount();
1366 nSeeds = pfClusteringVars[topoId].topoSeedCount();
1369 alpaka::syncBlockThreads(acc);
1371 if (topoId < nRH && nRHTopo > 0 && nSeeds > 0) {
1372 if (nRHTopo == nSeeds) {
1376 int rhIdx = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1377 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1378 clusterView[seedIdx].energy() =
pfRecHits[rhIdx].energy();
1379 clusterView[seedIdx].x() =
pfRecHits[rhIdx].x();
1380 clusterView[seedIdx].y() =
pfRecHits[rhIdx].y();
1381 clusterView[seedIdx].z() =
pfRecHits[rhIdx].z();
1384 }
else if ((not std::is_same_v<Device, alpaka::DevCpu>)&&nSeeds == 1) {
1387 acc, pfClusParams,
topology, topoId, nRHTopo,
pfRecHits, pfClusteringVars, clusterView, fracView);
1388 }
else if ((not std::is_same_v<Device, alpaka::DevCpu>)&&nSeeds <= 100 &&
1391 acc, pfClusParams,
topology, topoId, nSeeds, nRHTopo,
pfRecHits, pfClusteringVars, clusterView, fracView);
1392 }
else if (nSeeds <= 400 && nRHTopo - nSeeds <= 1500) {
1395 acc, pfClusParams,
topology, topoId, nSeeds, nRHTopo,
pfRecHits, pfClusteringVars, clusterView, fracView);
1399 printf(
"Topo cluster %d has %d seeds and %d rechits. Will be processed in next kernel.\n",
1412 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1415 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1416 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView
topology,
1420 Position4* __restrict__ globalClusterPos,
1421 Position4* __restrict__ globalPrevClusterPos,
1422 float* __restrict__ globalClusterEnergy,
1423 float* __restrict__ globalRhFracSum,
1424 int* __restrict__ globalSeeds,
1425 int* __restrict__ globalRechits)
const {
1427 for (
int topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]; topoId < nRH;
1429 int nRHTopo = pfClusteringVars[topoId].topoRHCount();
1430 int nSeeds = pfClusteringVars[topoId].topoSeedCount();
1433 if (nRHTopo > 0 && (nSeeds > 400 || nRHTopo - nSeeds > 1500)) {
1445 globalPrevClusterPos,
1446 globalClusterEnergy,
1451 alpaka::syncBlockThreads(acc);
1460 globalPrevClusterPos(
1462 globalClusterEnergy(
1479 const int threadsPerBlock = 256;
1483 alpaka::exec<Acc1D>(
queue,
1484 make_workdiv<Acc1D>(
blocks, threadsPerBlock),
1486 pfClusteringVars.view(),
1491 pfrhFractions.view(),
1494 alpaka::exec<Acc1D>(
queue,
1495 make_workdiv<Acc1D>(
blocks, threadsPerBlock),
1498 pfClusteringVars.view(),
1499 pfClusteringEdgeVars.view(),
1502 alpaka::exec<Acc1D>(
queue,
1503 make_workdiv<Acc1D>(
blocks, threadsPerBlock),
1506 pfClusteringVars.view(),
1507 pfClusteringEdgeVars.view());
1508 alpaka::exec<Acc1D>(
queue,
1509 make_workdiv<Acc1D>(
blocks, threadsPerBlock),
1512 pfClusteringVars.view(),
1513 pfClusteringEdgeVars.view());
1514 alpaka::exec<Acc1D>(
queue,
1515 make_workdiv<Acc1D>(
blocks, threadsPerBlock),
1518 pfClusteringVars.view(),
1519 pfClusteringEdgeVars.view());
1521 alpaka::exec<Acc1D>(
queue,
1525 pfClusteringVars.view(),
1530 alpaka::exec<Acc2D>(
queue,
1534 pfClusteringVars.view(),
1535 pfrhFractions.view());
1538 alpaka::exec<Acc1D>(
queue,
1544 pfClusteringVars.view(),
1546 pfrhFractions.view());
1548 alpaka::exec<Acc1D>(
queue,
1555 pfClusteringVars.view(),
1557 pfrhFractions.view(),
static constexpr int threadsPerBlockForClustering
static constexpr uint32_t blocksForExoticClusters
static ALPAKA_FN_ACC auto getSeedRhIdx(int *seeds, int seedNum)
static constexpr float cutoffFraction
cms::alpakatools::device_buffer< Device, uint32_t > nSeeds
ALPAKA_FN_ACC void operator()(const TAcc &acc, const reco::PFRecHitHostCollection::ConstView pfRecHits, const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView) const
static ALPAKA_FN_ACC void hcalFastCluster_multiSeedIterative(const TAcc &acc, reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, int topoId, int nSeeds, int nRHTopo, reco::PFRecHitDeviceCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView)
ALPAKA_FN_ACC void operator()(const TAcc &acc, const reco::PFRecHitHostCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFRecHitFractionDeviceCollection::View fracView) const
static ALPAKA_FN_ACC void hcalFastCluster_multiSeedParallel(const TAcc &acc, reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, int topoId, int nSeeds, int nRHTopo, reco::PFRecHitDeviceCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView)
static constexpr uint32_t maxTopoInput
cms::alpakatools::device_buffer< Device, reco::pfClustering::Position4[]> globalPrevClusterPos
cms::alpakatools::device_buffer< Device, int[]> globalSeeds
static ALPAKA_FN_ACC void hcalFastCluster_exotic(const TAcc &acc, reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, int topoId, int nSeeds, int nRHTopo, reco::PFRecHitDeviceCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView, Position4 *__restrict__ globalClusterPos, Position4 *__restrict__ globalPrevClusterPos, float *__restrict__ globalClusterEnergy, float *__restrict__ globalRhFracSum, int *__restrict__ globalSeeds, int *__restrict__ globalRechits)
constexpr uint32_t stride
ALPAKA_FN_ACC void operator()(const TAcc &acc, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, const reco::PFRecHitHostCollection::ConstView pfRecHits, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView, uint32_t *__restrict__ nSeeds) const
ALPAKA_FN_ACC void operator()(const TAcc &acc, const reco::PFRecHitHostCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, uint32_t *__restrict__ nSeeds) const
ALPAKA_FN_ACC static ALPAKA_FN_INLINE float dR2(Position4 pos1, Position4 pos2)
ALPAKA_FN_ACC void operator()(const TAcc &acc, const reco::PFRecHitHostCollection::ConstView pfRecHits, const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView, Position4 *__restrict__ globalClusterPos, Position4 *__restrict__ globalPrevClusterPos, float *__restrict__ globalClusterEnergy, float *__restrict__ globalRhFracSum, int *__restrict__ globalSeeds, int *__restrict__ globalRechits) const
void execute(Queue &queue, const reco::PFClusterParamsDeviceCollection ¶ms, const reco::PFRecHitHCALTopologyDeviceCollection &topology, reco::PFClusteringVarsDeviceCollection &pfClusteringVars, reco::PFClusteringEdgeVarsDeviceCollection &pfClusteringEdgeVars, const reco::PFRecHitHostCollection &pfRecHits, reco::PFClusterDeviceCollection &pfClusters, reco::PFRecHitFractionDeviceCollection &pfrhFractions)
cms::alpakatools::device_buffer< Device, float[]> globalRhFracSum
Abs< T >::type abs(const T &t)
PortableCollection<::reco::PFClusterSoA > PFClusterDeviceCollection
ALPAKA_FN_HOST_ACC static ALPAKA_FN_INLINE float atomicMaxF(const TAcc &acc, float *address, float val)
static constexpr float cutoffDistance
cms::alpakatools::device_buffer< Device, int[]> globalRechits
PortableCollection<::reco::PFClusteringVarsSoA > PFClusteringVarsDeviceCollection
PortableCollection<::reco::PFRecHitHCALTopologySoA > PFRecHitHCALTopologyDeviceCollection
static ALPAKA_FN_ACC auto getRhFrac(reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, int topoSeedBegin, reco::PFRecHitFractionDeviceCollection::View fracView, int seedNum, int rhNum)
Namespace of DDCMS conversion namespace.
PortableCollection<::reco::PFRecHitFractionSoA > PFRecHitFractionDeviceCollection
typename Layout::ConstView ConstView
PortableCollection<::reco::PFClusterParamsSoA > PFClusterParamsDeviceCollection
cms::alpakatools::device_buffer< Device, float[]> globalClusterEnergy
static ALPAKA_FN_ACC void updateClusterPos(reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, Position4 &pos4, float frac, int rhInd, reco::PFRecHitDeviceCollection::ConstView pfRecHits, float rhENormInv)
static constexpr uint32_t kHBHalf
T mag2() const
The vector magnitude squared. Equivalent to vec.dot(vec)
PFClusterProducerKernel(Queue &queue, const reco::PFRecHitHostCollection &pfRecHits)
PortableCollection<::reco::PFClusteringEdgeVarsSoA > PFClusteringEdgeVarsDeviceCollection
static ALPAKA_FN_ACC void hcalFastCluster_singleSeed(const TAcc &acc, reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, int topoId, int nRHTopo, reco::PFRecHitDeviceCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, reco::PFRecHitFractionDeviceCollection::View fracView)
cms::alpakatools::device_buffer< Device, reco::pfClustering::Position4[]> globalClusterPos
T1 atomicAdd(T1 *a, T2 b)