d0/d64/CAHitNtupletGeneratorKernelsAlloc_8cc_source.html

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 #include "CAHitNtupletGeneratorKernels.h"

 // #define GPU_DEBUG
 template <typename TrackerTraits>
 #ifdef __CUDACC__
 void CAHitNtupletGeneratorKernelsGPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
   using Traits = cms::cudacompat::GPUTraits;
 #else
 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
   using Traits = cms::cudacompat::CPUTraits;
 #endif

   using CellCuts = gpuPixelDoublets::CellCutsT<TrackerTraits>;

   // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)

   this->device_theCellNeighbors_ = Traits::template make_unique<CellNeighborsVector>(stream);
   this->device_theCellTracks_ = Traits::template make_unique<CellTracksVector>(stream);

 #ifdef GPU_DEBUG
   std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;
 #endif

   nHits++;  // storage requires one more counter;
   assert(nHits > 0);
   this->device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);
   this->device_hitToTupleStorage_ = Traits::template make_unique<typename HitToTuple::Counter[]>(nHits, stream);
   this->hitToTupleView_.assoc = this->device_hitToTuple_.get();
   this->hitToTupleView_.offStorage = this->device_hitToTupleStorage_.get();
   this->hitToTupleView_.offSize = nHits;

   this->device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);

   this->device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);

   this->device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get();
   this->device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get() + 1;
   this->device_nCells_ = (uint32_t*)(this->device_storage_.get() + 2);

   this->device_cellCuts_ = Traits::template make_unique<CellCuts>(stream);
   // FIXME: consider collapsing these 3 in one adhoc kernel
   if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
     cudaCheck(cudaMemsetAsync(this->device_nCells_, 0, sizeof(uint32_t), stream));
     cudaCheck(cudaMemcpyAsync(
         this->device_cellCuts_.get(), &(this->params_.cellCuts_), sizeof(CellCuts), cudaMemcpyDefault, stream));
   } else {
     *(this->device_nCells_) = 0;
     *(this->device_cellCuts_.get()) = this->params_.cellCuts_;
   }
   cms::cuda::launchZero(this->device_tupleMultiplicity_.get(), stream);
   cms::cuda::launchZero(this->hitToTupleView_, stream);  // we may wish to keep it in the edm
 #ifdef GPU_DEBUG
   cudaDeviceSynchronize();
   cudaCheck(cudaGetLastError());
 #endif
 }

 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase1>;
 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase2>;
 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::HIonPhase1>;

 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase1>;
 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase2>;
 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::HIonPhase1>;
cms::cudacompat::CPUTraits
Definition: HeterogeneousSoA.h:116

cms::cudacompat::GPUTraits
Definition: HeterogeneousSoA.h:56

gpuPixelDoublets::CellCutsT
Definition: gpuPixelDoubletsAlgos.h:38

cms::cuda::stream
uint32_t T const  *__restrict__ uint32_t const  *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

cms::cuda::assert
assert(be >=bs)

CAHitNtupletGeneratorKernelsCPU::allocateOnGPU
void allocateOnGPU(int32_t nHits, cudaStream_t stream)
Definition: CAHitNtupletGeneratorKernelsAlloc.cc:11

CAHitNtupletGeneratorKernels.h

svgfig.template
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521

relativeConstraints.value
value
Definition: relativeConstraints.py:53

CAHitNtupletGeneratorKernelsGPU::allocateOnGPU
void allocateOnGPU(int32_t nHits, cudaStream_t stream)

cudaCheck.h

cms::cuda::AtomicPairCounter
Definition: AtomicPairCounter.h:11

gather_cfg.cout
cout
Definition: gather_cfg.py:144

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

nHits
TupleMultiplicity< TrackerTraits > const  *__restrict__ uint32_t nHits
Definition: RiemannFitOnGPU.h:27

CAHitNtupletGeneratorKernelsCPU
Definition: CAHitNtupletGeneratorKernels.h:322

CAHitNtupletGeneratorKernelsGPU
Definition: CAHitNtupletGeneratorKernels.h:295