d2/d62/splitVertices_8h_source.html

 #ifndef RecoPixelVertexing_PixelVertexFinding_splitVertices_h
 #define RecoPixelVertexing_PixelVertexFinding_splitVertices_h

 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <alpaka/alpaka.hpp>
 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"

 #include "vertexFinder.h"

 namespace ALPAKA_ACCELERATOR_NAMESPACE {
   namespace vertexFinder {
     using VtxSoAView = ::reco::ZVertexSoAView;
     using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(const TAcc& acc,
                                                                                      VtxSoAView& pdata,
                                                                                      WsSoAView& pws,
                                                                                      float maxChi2) {
       constexpr bool verbose = false;  // in principle the compiler should optmize out if false
       const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);

       auto& __restrict__ data = pdata;
       auto& __restrict__ ws = pws;
       auto nt = ws.ntrks();
       float const* __restrict__ zt = ws.zt();
       float const* __restrict__ ezt2 = ws.ezt2();
       float* __restrict__ zv = data.zv();
       float* __restrict__ wv = data.wv();
       float const* __restrict__ chi2 = data.chi2();
       uint32_t& nvFinal = data.nvFinal();

       int32_t const* __restrict__ nn = data.ndof();
       int32_t* __restrict__ iv = ws.iv();

       ALPAKA_ASSERT_OFFLOAD(zt);
       ALPAKA_ASSERT_OFFLOAD(wv);
       ALPAKA_ASSERT_OFFLOAD(chi2);
       ALPAKA_ASSERT_OFFLOAD(nn);

       constexpr uint32_t MAXTK = 512;

       auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc);   // track index
       auto& zz = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);      // z pos
       auto& newV = alpaka::declareSharedVar<uint8_t[MAXTK], __COUNTER__>(acc);  // 0 or 1
       auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);      // z weight
       auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);          // number of track for this vertex

       const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
       const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);

       // one vertex per block
       for (auto kv = blockIdx; kv < nvFinal; kv += gridDimension) {
         if (nn[kv] < 4)
           continue;
         if (chi2[kv] < maxChi2 * float(nn[kv]))
           continue;

         ALPAKA_ASSERT_OFFLOAD(nn[kv] < int32_t(MAXTK));

         if ((uint32_t)nn[kv] >= MAXTK)
           continue;  // too bad FIXME

         nq = 0u;
         alpaka::syncBlockThreads(acc);

         // copy to local
         for (auto k : cms::alpakatools::independent_group_elements(acc, nt)) {
           if (iv[k] == int(kv)) {
             auto old = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{});
             zz[old] = zt[k] - zv[kv];
             newV[old] = zz[old] < 0 ? 0 : 1;
             ww[old] = 1.f / ezt2[k];
             it[old] = k;
           }
         }

         // the new vertices
         auto& znew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
         auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
         alpaka::syncBlockThreads(acc);

         ALPAKA_ASSERT_OFFLOAD(int(nq) == nn[kv] + 1);

         int maxiter = 20;
         // kt-min....
         bool more = true;
         while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
           more = false;
           if (0 == threadIdxLocal) {
             znew[0] = 0;
             znew[1] = 0;
             wnew[0] = 0;
             wnew[1] = 0;
           }
           alpaka::syncBlockThreads(acc);

           for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) {
             auto i = newV[k];
             alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{});
             alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{});
           }
           alpaka::syncBlockThreads(acc);

           if (0 == threadIdxLocal) {
             znew[0] /= wnew[0];
             znew[1] /= wnew[1];
           }
           alpaka::syncBlockThreads(acc);

           for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) {
             auto d0 = fabs(zz[k] - znew[0]);
             auto d1 = fabs(zz[k] - znew[1]);
             auto newer = d0 < d1 ? 0 : 1;
             more |= newer != newV[k];
             newV[k] = newer;
           }
           --maxiter;
           if (maxiter <= 0)
             more = false;
         }

         // avoid empty vertices
         if (0 == wnew[0] || 0 == wnew[1])
           continue;

         // quality cut
         auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);

         auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);

         if (verbose && 0 == threadIdxLocal)
           printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);

         if (chi2Dist < 4)
           continue;

         // get a new global vertex
         auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
         if (0 == threadIdxLocal)
           igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{});
         alpaka::syncBlockThreads(acc);
         for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) {
           if (1 == newV[k])
             iv[it[k]] = igv;
         }

       }  // loop on vertices
     }

     class SplitVerticesKernel {
     public:
       template <typename TAcc>
       ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const {
         splitVertices(acc, pdata, pws, maxChi2);
       }
     };
   }  // namespace vertexFinder
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 #endif  // RecoPixelVertexing_PixelVertexFinding_plugins_splitVertices.h
ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::gridDimension
const uint32_t gridDimension(alpaka::getWorkDiv< alpaka::Grid, alpaka::Blocks >(acc)[0u])

mps_fire.i
i
Definition: mps_fire.py:429

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::threadIdxLocal
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::newV
auto & newV
Definition: splitVertices.h:47

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
Definition: clusterTracksByDensity.h:26

dqmPostProcessing_online.newer
newer
Definition: dqmPostProcessing_online.py:130

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::wv
float *__restrict__ wv
Definition: fitVertices.h:30

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::nvFinal
uint32_t & nvFinal
Definition: clusterTracksByDensity.h:49

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ALPAKA_ASSERT_OFFLOAD
ALPAKA_ASSERT_OFFLOAD(zt)

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ww
auto & ww
Definition: splitVertices.h:48

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::data
auto &__restrict__ data
Definition: clusterTracksByDensity.h:43

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ezt2
float const  *__restrict__ ezt2
Definition: clusterTracksByDensity.h:47

workdivision.h

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::verbose
constexpr bool verbose
Definition: clusterTracksByDensity.h:34

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::SplitVerticesKernel::operator()
ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const
Definition: splitVertices.h:156

cms::alpakatools::independent_group_elements
Definition: workdivision.h:690

vertexFinder.h

ALPAKA_ACCELERATOR_NAMESPACE
Definition: SiPixelCablingSoAESProducer.cc:21

gpuVertexFinder::more
bool more
Definition: gpuClusterTracksIterative.h:110

zVertex::ZVertexSoAView
ZVertexSoAHeterogeneousLayout<>::View ZVertexSoAView
Definition: ZVertexUtilities.h:20

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::maxChi2
ALPAKA_FN_ACC ALPAKA_FN_INLINE VtxSoAView WsSoAView float maxChi2
Definition: splitVertices.h:21

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::iv
int32_t *__restrict__ iv
Definition: clusterTracksByDensity.h:54

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::nn
int32_t *__restrict__ nn
Definition: clusterTracksByDensity.h:53

cms::cudacompat::atomicInc
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::SplitVerticesKernel
Definition: splitVertices.h:153

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::pws
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView WsSoAView & pws
Definition: clusterTracksByDensity.h:26

l1t::Blocks
std::vector< Block > Blocks
Definition: Block.h:99

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::MAXTK
constexpr uint32_t MAXTK
Definition: splitVertices.h:43

maxiter
static const MaxIter maxiter
Definition: HelixArbitraryPlaneCrossing.cc:30

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws
auto &__restrict__ ws
Definition: clusterTracksByDensity.h:44

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::zv
float *__restrict__ zv
Definition: fitVertices.h:29

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::nt
auto nt
Definition: clusterTracksByDensity.h:45

gpuVertexFinder::splitVertices
splitVertices(pdata, pws, maxChi2ForSplit)

vertexFinder::PixelVertexWorkSpaceSoAView
PixelVertexWSSoALayout<>::View PixelVertexWorkSpaceSoAView
Definition: PixelVertexWorkSpaceLayout.h:23

vertexFinder
Definition: PixelVertexWorkSpaceLayout.h:10

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::WsSoAView
::vertexFinder::PixelVertexWorkSpaceSoAView WsSoAView
Definition: clusterTracksByDensity.h:17

d0
static constexpr float d0
Definition: L1EGammaCrystalsEmulatorProducer.cc:83

cms::alpakatools::elements_with_stride
Definition: workdivision.h:115

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::constexpr
if constexpr(verbose)
Definition: clusterTracksByDensity.h:37

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::zt
float const  *__restrict__ zt
Definition: clusterTracksByDensity.h:46

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::VtxSoAView
::reco::ZVertexSoAView VtxSoAView
Definition: clusterTracksByDensity.h:16

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::it
auto & it
Definition: splitVertices.h:45

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::nq
auto & nq
Definition: splitVertices.h:49

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::chi2
float *__restrict__ chi2
Definition: fitVertices.h:31

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::blockIdx
const uint32_t blockIdx(alpaka::getIdx< alpaka::Grid, alpaka::Blocks >(acc)[0u])

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::zz
auto & zz
Definition: splitVertices.h:46

d1
static constexpr float d1
Definition: L1EGammaCrystalsEmulatorProducer.cc:83

HistoContainer.h

cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61

dqmdumpme.k
k
Definition: dqmdumpme.py:60

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::__attribute__
ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) clusterTracksByDensity(const TAcc &acc