CMS 3D CMS Logo

splitVertices.h
Go to the documentation of this file.
1 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_splitVertices_h
2 #define RecoTracker_PixelVertexFinding_plugins_alpaka_splitVertices_h
3 
4 #include <algorithm>
5 #include <cmath>
6 #include <cstdint>
7 
8 #include <alpaka/alpaka.hpp>
9 
13 
14 #include "vertexFinder.h"
15 
17 
20  template <typename TAcc>
21  ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(const TAcc& acc,
23  WsSoAView& pws,
24  float maxChi2) {
25  constexpr bool verbose = false; // in principle the compiler should optmize out if false
26  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
27 
28  auto& __restrict__ data = pdata;
29  auto& __restrict__ ws = pws;
30  auto nt = ws.ntrks();
31  float const* __restrict__ zt = ws.zt();
32  float const* __restrict__ ezt2 = ws.ezt2();
33  float* __restrict__ zv = data.zv();
34  float* __restrict__ wv = data.wv();
35  float const* __restrict__ chi2 = data.chi2();
36  uint32_t& nvFinal = data.nvFinal();
37 
38  int32_t const* __restrict__ nn = data.ndof();
39  int32_t* __restrict__ iv = ws.iv();
40 
45 
46  constexpr uint32_t MAXTK = 512;
47 
48  auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc); // track index
49  auto& zz = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc); // z pos
50  auto& newV = alpaka::declareSharedVar<uint8_t[MAXTK], __COUNTER__>(acc); // 0 or 1
51  auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc); // z weight
52  auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc); // number of track for this vertex
53 
54  const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
55  const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
56 
57  // one vertex per block
58  for (auto kv = blockIdx; kv < nvFinal; kv += gridDimension) {
59  if (nn[kv] < 4)
60  continue;
61  if (chi2[kv] < maxChi2 * float(nn[kv]))
62  continue;
63 
64  ALPAKA_ASSERT_ACC(nn[kv] < int32_t(MAXTK));
65 
66  if ((uint32_t)nn[kv] >= MAXTK)
67  continue; // too bad FIXME
68 
69  nq = 0u;
70  alpaka::syncBlockThreads(acc);
71 
72  // copy to local
74  if (iv[k] == int(kv)) {
75  auto old = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{});
76  zz[old] = zt[k] - zv[kv];
77  newV[old] = zz[old] < 0 ? 0 : 1;
78  ww[old] = 1.f / ezt2[k];
79  it[old] = k;
80  }
81  }
82 
83  // the new vertices
84  auto& znew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
85  auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
86  alpaka::syncBlockThreads(acc);
87 
88  ALPAKA_ASSERT_ACC(int(nq) == nn[kv] + 1);
89 
90  int maxiter = 20;
91  // kt-min....
92  bool more = true;
93  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
94  more = false;
95  if (0 == threadIdxLocal) {
96  znew[0] = 0;
97  znew[1] = 0;
98  wnew[0] = 0;
99  wnew[1] = 0;
100  }
101  alpaka::syncBlockThreads(acc);
102 
103  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
104  auto i = newV[k];
105  alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{});
106  alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{});
107  }
108  alpaka::syncBlockThreads(acc);
109 
110  if (0 == threadIdxLocal) {
111  znew[0] /= wnew[0];
112  znew[1] /= wnew[1];
113  }
114  alpaka::syncBlockThreads(acc);
115 
116  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
117  auto d0 = fabs(zz[k] - znew[0]);
118  auto d1 = fabs(zz[k] - znew[1]);
119  auto newer = d0 < d1 ? 0 : 1;
120  more |= newer != newV[k];
121  newV[k] = newer;
122  }
123  --maxiter;
124  if (maxiter <= 0)
125  more = false;
126  }
127 
128  // avoid empty vertices
129  if (0 == wnew[0] || 0 == wnew[1])
130  continue;
131 
132  // quality cut
133  auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
134 
135  auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
136 
137  if (verbose && 0 == threadIdxLocal)
138  printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
139 
140  if (chi2Dist < 4)
141  continue;
142 
143  // get a new global vertex
144  auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
145  if (0 == threadIdxLocal)
146  igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{});
147  alpaka::syncBlockThreads(acc);
148  for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
149  if (1 == newV[k])
150  iv[it[k]] = igv;
151  }
152 
153  } // loop on vertices
154  }
155 
157  public:
158  template <typename TAcc>
159  ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const {
160  splitVertices(acc, pdata, pws, maxChi2);
161  }
162  };
163 
164 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
165 
166 #endif // RecoTracker_PixelVertexFinding_plugins_alpaka_splitVertices_h
const uint32_t gridDimension(alpaka::getWorkDiv< alpaka::Grid, alpaka::Blocks >(acc)[0u])
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:303
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
ALPAKA_FN_ACC ALPAKA_FN_INLINE VtxSoAView WsSoAView float maxChi2
Definition: splitVertices.h:24
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
std::vector< Block > Blocks
Definition: Block.h:99
static const MaxIter maxiter
splitVertices(pdata, pws, maxChi2ForSplit)
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
static constexpr float d0
const uint32_t blockIdx(alpaka::getIdx< alpaka::Grid, alpaka::Blocks >(acc)[0u])
static constexpr float d1
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) clusterTracksByDensity(const TAcc &acc