CMS 3D CMS Logo

splitVertices.h
Go to the documentation of this file.
1 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
2 #define RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
13 
14 #include "vertexFinder.h"
15 
17 
21 
22  ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(
24  constexpr bool verbose = false; // in principle the compiler should optmize out if false
25  constexpr uint32_t MAXTK = 512;
26 
27  auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc); // track index
28  auto& zz = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc); // z pos
29  auto& newV = alpaka::declareSharedVar<uint8_t[MAXTK], __COUNTER__>(acc); // 0 or 1
30  auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc); // z weight
31  auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc); // number of track for this vertex
32 
33  // one vertex per block
34  for (auto kv : cms::alpakatools::independent_groups(acc, data.nvFinal())) {
35  int32_t ndof = trkdata[kv].ndof();
36  if (ndof < 4)
37  continue;
38  if (data[kv].chi2() < maxChi2 * float(ndof))
39  continue;
40 
41  ALPAKA_ASSERT_ACC(ndof < int32_t(MAXTK));
42 
43  if ((uint32_t)ndof >= MAXTK)
44  continue; // too bad FIXME
45 
47  // reset the number of tracks for the current vertex
48  nq = 0u;
49  }
50  alpaka::syncBlockThreads(acc);
51 
52  // cache the data of the tracks associated to the current vertex into shared memory
53  for (auto k : cms::alpakatools::independent_group_elements(acc, ws.ntrks())) {
54  if (ws[k].iv() == int(kv)) {
55  auto index = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{});
56  it[index] = k;
57  zz[index] = ws[k].zt() - data[kv].zv();
58  newV[index] = zz[index] < 0 ? 0 : 1;
59  ww[index] = 1.f / ws[k].ezt2();
60  }
61  }
62 
63  // the new vertices
64  auto& znew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
65  auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
66  alpaka::syncBlockThreads(acc);
67 
68  ALPAKA_ASSERT_ACC(int(nq) == ndof + 1);
69 
70  int maxiter = 20;
71  // kt-min....
72  bool more = true;
73  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
74  more = false;
76  znew[0] = 0;
77  znew[1] = 0;
78  wnew[0] = 0;
79  wnew[1] = 0;
80  }
81  alpaka::syncBlockThreads(acc);
82 
83  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
84  auto i = newV[k];
85  alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{});
86  alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{});
87  }
88  alpaka::syncBlockThreads(acc);
89 
91  znew[0] /= wnew[0];
92  znew[1] /= wnew[1];
93  }
94  alpaka::syncBlockThreads(acc);
95 
96  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
97  auto d0 = fabs(zz[k] - znew[0]);
98  auto d1 = fabs(zz[k] - znew[1]);
99  auto newer = d0 < d1 ? 0 : 1;
100  more |= newer != newV[k];
101  newV[k] = newer;
102  }
103  --maxiter;
104  if (maxiter <= 0)
105  more = false;
106  }
107 
108  // avoid empty vertices
109  if (0 == wnew[0] || 0 == wnew[1])
110  continue;
111 
112  // quality cut
113  auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
114 
115  auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
116 
117  if constexpr (verbose) {
119  printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * data[kv].wv());
120  }
121 
122  if (chi2Dist < 4)
123  continue;
124 
125  // get a new global vertex
126  auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
128  igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{});
129  alpaka::syncBlockThreads(acc);
130  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
131  if (1 == newV[k])
132  ws[it[k]].iv() = igv;
133  }
134 
135  // synchronise the threads before starting the next iteration of the loop over the vertices and resetting the shared memory
136  alpaka::syncBlockThreads(acc);
137  } // loop on vertices
138  }
139 
141  public:
142  ALPAKA_FN_ACC void operator()(
143  Acc1D const& acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, float maxChi2) const {
145  }
146  };
147 
148 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
149 
150 #endif // RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
ALPAKA_ASSERT_ACC(nvFinal<=nvIntermediate)
ALPAKA_FN_ACC ALPAKA_FN_INLINE VtxSoAView TrkSoAView WsSoAView float maxChi2
Definition: splitVertices.h:23
::reco::ZVertexTracksSoAView TrkSoAView
Definition: sortByPt2.h:23
bool verbose
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void fitVertices(Acc1D const &acc
ALPAKA_FN_ACC auto independent_groups(TAcc const &acc, TArgs... args)
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
ZVertexTracksSoA::View ZVertexTracksSoAView
Definition: ZVertexSoA.h:33
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
std::vector< Block > Blocks
Definition: Block.h:99
static const MaxIter maxiter
splitVertices(pdata, pws, maxChi2ForSplit)
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
Definition: sortByPt2.h:24
static constexpr float d0
ALPAKA_FN_ACC void operator()(Acc1D const &acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, float maxChi2) const
static constexpr float d1
ALPAKA_ACCELERATOR_NAMESPACE::Acc1D Acc1D
Definition: LSTEvent.dev.cc:15
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61