d5/d77/gpuVertexFinder_8cc_source.html

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
 #include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"

 #include "PixelVertexWorkSpaceUtilities.h"
 #include "PixelVertexWorkSpaceSoAHost.h"
 #include "PixelVertexWorkSpaceSoADevice.h"

 #include "gpuClusterTracksByDensity.h"
 #include "gpuClusterTracksDBSCAN.h"
 #include "gpuClusterTracksIterative.h"
 #include "gpuFitVertices.h"
 #include "gpuSortByPt2.h"
 #include "gpuSplitVertices.h"

 #undef PIXVERTEX_DEBUG_PRODUCE

 namespace gpuVertexFinder {

   // reject outlier tracks that contribute more than this to the chi2 of the vertex fit
   constexpr float maxChi2ForFirstFit = 50.f;
   constexpr float maxChi2ForFinalFit = 5000.f;

   // split vertices with a chi2/NDoF greater than this
   constexpr float maxChi2ForSplit = 9.f;

   template <typename TrackerTraits>
   __global__ void loadTracks(
       TrackSoAConstView<TrackerTraits> tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) {
     auto const* quality = tracks_view.quality();
     using helper = TracksUtilities<TrackerTraits>;
     auto first = blockIdx.x * blockDim.x + threadIdx.x;
     for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) {
       auto nHits = helper::nHits(tracks_view, idx);
       assert(nHits >= 3);

       // initialize soa...
       soa[idx].idv() = -1;

       if (helper::isTriplet(tracks_view, idx))
         continue;  // no triplets
       if (quality[idx] < pixelTrack::Quality::highPurity)
         continue;

       auto pt = tracks_view[idx].pt();

       if (pt < ptMin)
         continue;

       // clamp pt
       pt = std::min(pt, ptMax);

       auto& data = pws;
       auto it = atomicAdd(&data.ntrks(), 1);
       data[it].itrk() = idx;
       data[it].zt() = helper::zip(tracks_view, idx);
       data[it].ezt2() = tracks_view[idx].covariance()(14);
       data[it].ptt2() = pt * pt;
     }
   }

 // #define THREE_KERNELS
 #ifndef THREE_KERNELS
   __global__ void vertexFinderOneKernel(VtxSoAView pdata,
                                         WsSoAView pws,
                                         int minT,      // min number of neighbours to be "seed"
                                         float eps,     // max absolute distance to cluster
                                         float errmax,  // max error to be "seed"
                                         float chi2max  // max normalized distance to cluster,
   ) {
     clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
     __syncthreads();
     fitVertices(pdata, pws, maxChi2ForFirstFit);
     __syncthreads();
     splitVertices(pdata, pws, maxChi2ForSplit);
     __syncthreads();
     fitVertices(pdata, pws, maxChi2ForFinalFit);
     __syncthreads();
     sortByPt2(pdata, pws);
   }
 #else
   __global__ void vertexFinderKernel1(VtxSoAView pdata,
                                       WsSoAView pws,
                                       int minT,      // min number of neighbours to be "seed"
                                       float eps,     // max absolute distance to cluster
                                       float errmax,  // max error to be "seed"
                                       float chi2max  // max normalized distance to cluster,
   ) {
     clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
     __syncthreads();
     fitVertices(pdata, pws, maxChi2ForFirstFit);
   }

   __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) {
     fitVertices(pdata, pws, maxChi2ForFinalFit);
     __syncthreads();
     sortByPt2(pdata, pws);
   }
 #endif

   template <typename TrackerTraits>
 #ifdef __CUDACC__
   ZVertexSoADevice Producer<TrackerTraits>::makeAsync(cudaStream_t stream,
                                                       const TrackSoAConstView<TrackerTraits>& tracks_view,
                                                       float ptMin,
                                                       float ptMax) const {
 #ifdef PIXVERTEX_DEBUG_PRODUCE
     std::cout << "producing Vertices on GPU" << std::endl;
 #endif  // PIXVERTEX_DEBUG_PRODUCE
     ZVertexSoADevice vertices(stream);
 #else
   ZVertexSoAHost Producer<TrackerTraits>::make(const TrackSoAConstView<TrackerTraits>& tracks_view,
                                                float ptMin,
                                                float ptMax) const {
 #ifdef PIXVERTEX_DEBUG_PRODUCE
     std::cout << "producing Vertices on  CPU" << std::endl;
 #endif  // PIXVERTEX_DEBUG_PRODUCE
     ZVertexSoAHost vertices;
 #endif
     auto soa = vertices.view();

     assert(vertices.buffer());

 #ifdef __CUDACC__
     auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream);
 #else
     auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost();
 #endif

 #ifdef __CUDACC__
     init<<<1, 1, 0, stream>>>(soa, ws_d.view());
     auto blockSize = 128;
     auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize;
     loadTracks<TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
     cudaCheck(cudaGetLastError());
 #else
     init(soa, ws_d.view());
     loadTracks<TrackerTraits>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
 #endif

 #ifdef __CUDACC__
     // Running too many thread lead to problems when printf is enabled.
     constexpr int maxThreadsForPrint = 1024 - 128;
     constexpr int numBlocks = 1024;
     constexpr int threadsPerBlock = 128;

     if (oneKernel_) {
       // implemented only for density clustesrs
 #ifndef THREE_KERNELS
       vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
 #else
       vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
       cudaCheck(cudaGetLastError());
       // one block per vertex...
       splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
       cudaCheck(cudaGetLastError());
       vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
 #endif
     } else {  // five kernels
       if (useDensity_) {
         clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
             soa, ws_d.view(), minT, eps, errmax, chi2max);
       } else if (useDBSCAN_) {
         clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
       } else if (useIterative_) {
         clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
       }
       cudaCheck(cudaGetLastError());
       fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit);
       cudaCheck(cudaGetLastError());
       if (doSplitting_) {
         // one block per vertex...
         splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
         cudaCheck(cudaGetLastError());
         fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit);
         cudaCheck(cudaGetLastError());
       }
       sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
     }
     cudaCheck(cudaGetLastError());
 #else  // __CUDACC__
     if (useDensity_) {
       clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max);
     } else if (useDBSCAN_) {
       clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max);
     } else if (useIterative_) {
       clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max);
     }
 #ifdef PIXVERTEX_DEBUG_PRODUCE
     std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl;
 #endif  // PIXVERTEX_DEBUG_PRODUCE
     fitVertices(soa, ws_d.view(), maxChi2ForFirstFit);
     // one block per vertex!
     if (doSplitting_) {
       splitVertices(soa, ws_d.view(), maxChi2ForSplit);
       fitVertices(soa, ws_d.view(), maxChi2ForFinalFit);
     }
     sortByPt2(soa, ws_d.view());
 #endif

     return vertices;
   }

   template class Producer<pixelTopology::Phase1>;
   template class Producer<pixelTopology::Phase2>;
   template class Producer<pixelTopology::HIonPhase1>;
 }  // namespace gpuVertexFinder
gpuVertexFinder::Producer::make
ZVertexSoAHost make(const TkSoAConstView &tracks_view, float ptMin, float ptMax) const
Definition: gpuVertexFinder.cc:113

cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29

gpuVertexFinder::__syncthreads
__syncthreads()
Definition: cudaCompat.h:132

gpuSplitVertices.h

helper
Definition: helper.py:1

gpuSortByPt2.h

gpuClusterTracksIterative.h

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::pdata
ALPAKA_FN_ACC ALPAKA_FN_INLINE void VtxSoAView & pdata
Definition: clusterTracksByDensity.h:30

cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33

gpuVertexFinder::Producer::makeAsync
ZVertexSoADevice makeAsync(cudaStream_t stream, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const

caHitNtupletGeneratorKernels::tracks_view
uint32_t const  *__restrict__ TkSoAView< TrackerTraits > tracks_view
Definition: CAHitNtupletGeneratorKernelsImpl.h:161

heavyIonCSV_trainingSettings.idx
idx
Definition: heavyIonCSV_trainingSettings.py:5

TracksUtilities::zip
static constexpr __host__ __device__ float zip(const TrackSoAConstView &tracks, int32_t i)
Definition: PixelTrackUtilities.h:70

gpuVertexFinder::chi2max
__device__ WsSoAView int float float float chi2max
Definition: gpuClusterTracksByDensity.h:26

gpuVertexFinder::data
auto &__restrict__ data
Definition: gpuClusterTracksByDensity.h:35

gpuVertexFinder::fitVertices
fitVertices(pdata, pws, maxChi2ForFirstFit)

__global__
#define __global__
Definition: cudaCompat.h:19

gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost
PixelVertexWorkSpaceSoAHost< zVertex::utilities::MAXTRACKS > PixelVertexWorkSpaceSoAHost
Definition: PixelVertexWorkSpaceSoAHost.h:20

cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30

init
int init
Definition: HydjetWrapper.h:66

gpuVertexFinder::ptMin
VtxSoAView WsSoAView float ptMin
Definition: gpuVertexFinder.cc:30

gpuVertexFinder::first
auto first
Definition: gpuVertexFinder.cc:33

gpuClusterTracksByDensity.h

TracksUtilities
Definition: PixelTrackUtilities.h:51

gpuVertexFinder::eps
__device__ WsSoAView int float eps
Definition: gpuClusterTracksByDensity.h:21

cms::cuda::stream
uint32_t T const  *__restrict__ uint32_t const  *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr
if constexpr(n > 3)
Definition: BrokenLine.h:164

gpuVertexFinder::Producer
Definition: gpuVertexFinder.h:26

DiDispStaMuonMonitor_cfi.pt
pt
Definition: DiDispStaMuonMonitor_cfi.py:39

gpuVertexFinder::maxChi2ForFirstFit
constexpr float maxChi2ForFirstFit
Definition: gpuVertexFinder.cc:22

AlignmentTracksFromVertexSelector_cfi.vertices
vertices
Definition: AlignmentTracksFromVertexSelector_cfi.py:5

gpuVertexFinder::sortByPt2
sortByPt2(pdata, pws)

quality
string quality
Definition: beamSpotDipStandalone.cc:53

gpuVertexFinder::nt
auto nt
Definition: gpuClusterTracksByDensity.h:37

gpuVertexFinder::pws
__device__ WsSoAView & pws
Definition: gpuClusterTracksByDensity.h:21

PixelTrackUtilities.h

PixelVertexWorkSpaceUtilities.h

SiStripPI::min
Definition: SiStripPayloadInspectorHelper.h:178

gpuVertexFinder::minT
__device__ WsSoAView int minT
Definition: gpuClusterTracksByDensity.h:21

gpuVertexFinder
Definition: gpuClusterTracksByDensity.h:13

gpuVertexFinder::ptMax
VtxSoAView WsSoAView float float ptMax
Definition: gpuVertexFinder.cc:30

gpuVertexFinder::splitVertices
splitVertices(pdata, pws, maxChi2ForSplit)

cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32

PixelVertexWorkSpaceSoAHost.h

TracksUtilities::nHits
static constexpr __host__ __device__ int nHits(const TrackSoAConstView &tracks, int i)
Definition: PixelTrackUtilities.h:136

gpuClusterTracksDBSCAN.h

ZVertexSoAHeterogeneousDevice
Definition: ZVertexSoAHeterogeneousDevice.h:11

cudaCheck.h

gpuVertexFinder::errmax
__device__ WsSoAView int float float errmax
Definition: gpuClusterTracksByDensity.h:21

gpuFitVertices.h

pixelTrack::Quality::highPurity

gpuVertexFinder::WsSoAView
gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView WsSoAView
Definition: gpuVertexFinder.h:18

gpuVertexFinder::maxChi2ForFinalFit
constexpr float maxChi2ForFinalFit
Definition: gpuVertexFinder.cc:23

ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::it
auto & it
Definition: splitVertices.h:48

TracksUtilities::isTriplet
static constexpr __host__ __device__ bool isTriplet(const TrackSoAConstView &tracks, int i)
Definition: PixelTrackUtilities.h:74

PixelVertexWorkSpaceSoADevice.h

TrackSoAConstView
typename TrackSoA< TrackerTraits >::template TrackSoALayout<>::ConstView TrackSoAConstView
Definition: PixelTrackUtilities.h:237

gather_cfg.cout
cout
Definition: gather_cfg.py:144

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

gpuVertexFinder::maxChi2ForSplit
constexpr float maxChi2ForSplit
Definition: gpuVertexFinder.cc:26

nHits
TupleMultiplicity< TrackerTraits > const  *__restrict__ uint32_t nHits
Definition: RiemannFitOnGPU.h:27

ZVertexSoAHeterogeneousHost< zVertex::utilities::MAXTRACKS >

gpuVertexFinder::VtxSoAView
zVertex::ZVertexSoAView VtxSoAView
Definition: gpuVertexFinder.h:17

gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice
PixelVertexWorkSpaceSoADevice< zVertex::utilities::MAXTRACKS > PixelVertexWorkSpaceSoADevice
Definition: PixelVertexWorkSpaceSoADevice.h:20

gpuVertexFinder::assert
assert(zt)

gpuVertexFinder::soa
VtxSoAView soa
Definition: gpuVertexFinder.cc:30

cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61

ZVertexUtilities.h