d0/d64/CAHitNtupletGeneratorKernelsAlloc_8cc_source.html

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


 #include "CAHitNtupletGeneratorKernels.h"


 template <>

 #ifdef __CUDACC__

 void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) {

 #else

 void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) {

 #endif

   // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)


   device_theCellNeighbors_ = Traits::template make_unique<caConstants::CellNeighborsVector>(stream);

   device_theCellTracks_ = Traits::template make_unique<caConstants::CellTracksVector>(stream);


 #ifdef GPU_DEBUG

   std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;

 #endif


   nHits++;  // storage requires one more counter;

   assert(nHits > 0);

   device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);

   device_hitToTupleStorage_ = Traits::template make_unique<HitToTuple::Counter[]>(nHits, stream);

   hitToTupleView_.assoc = device_hitToTuple_.get();

   hitToTupleView_.offStorage = device_hitToTupleStorage_.get();

   hitToTupleView_.offSize = nHits;


   device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);


   device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);


   device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get();

   device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;

   device_nCells_ = (uint32_t*)(device_storage_.get() + 2);


   // FIXME: consider collapsing these 3 in one adhoc kernel

   if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {

     cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));

   } else {

     *device_nCells_ = 0;

   }

   cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream);

   cms::cuda::launchZero(hitToTupleView_, stream);  // we may wish to keep it in the edm

 #ifdef GPU_DEBUG

   cudaDeviceSynchronize();

   cudaCheck(cudaGetLastError());

 #endif

 }

CAHitNtupletGeneratorKernels::device_hitToTuple_
unique_ptr< HitToTuple > device_hitToTuple_
Definition: CAHitNtupletGeneratorKernels.h:211

cms::cuda::OneToManyAssocView::assoc
Assoc * assoc
Definition: OneToManyAssoc.h:27

cms::cuda::OneToManyAssocView::offSize
int32_t offSize
Definition: OneToManyAssoc.h:30

CAHitNtupletGeneratorKernels::hitToTupleView_
HitToTuple::View hitToTupleView_
Definition: CAHitNtupletGeneratorKernels.h:213

relativeConstraints.value
tuple value
Definition: relativeConstraints.py:55

CAHitNtupletGeneratorKernels::device_hitToTupleStorage_
unique_ptr< HitToTuple::Counter[]> device_hitToTupleStorage_
Definition: CAHitNtupletGeneratorKernels.h:212

cms::cuda::OneToManyAssocView::offStorage
Counter * offStorage
Definition: OneToManyAssoc.h:28

cms::cuda::stream
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

cms::cuda::assert
assert(be >=bs)

CAHitNtupletGeneratorKernels::allocateOnGPU
void allocateOnGPU(int32_t nHits, cudaStream_t stream)
Definition: CAHitNtupletGeneratorKernelsAlloc.cc:9

CAHitNtupletGeneratorKernels.h

CAHitNtupletGeneratorKernels::device_theCellNeighbors_
unique_ptr< caConstants::CellNeighborsVector > device_theCellNeighbors_
Definition: CAHitNtupletGeneratorKernels.h:201

CAHitNtupletGeneratorKernels::device_nCells_
uint32_t * device_nCells_
Definition: CAHitNtupletGeneratorKernels.h:209

cudaCheck.h

CAHitNtupletGeneratorKernels::device_hitTuple_apc_
cms::cuda::AtomicPairCounter * device_hitTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:217

cms::cuda::AtomicPairCounter
Definition: AtomicPairCounter.h:11

nHits
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
Definition: CAHitNtupletGeneratorKernelsImpl.h:43

CAHitNtupletGeneratorKernels::device_storage_
unique_ptr< cms::cuda::AtomicPairCounter::c_type[]> device_storage_
Definition: CAHitNtupletGeneratorKernels.h:221

cms::cuda::AtomicPairCounter::get
__device__ __host__ Counters get() const
Definition: AtomicPairCounter.h:35

gather_cfg.cout
tuple cout
Definition: gather_cfg.py:144

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

CAHitNtupletGeneratorKernels::device_theCellTracks_
unique_ptr< caConstants::CellTracksVector > device_theCellTracks_
Definition: CAHitNtupletGeneratorKernels.h:203

svgfig.template
def template
Definition: svgfig.py:521

CAHitNtupletGeneratorKernels::device_tupleMultiplicity_
unique_ptr< TupleMultiplicity > device_tupleMultiplicity_
Definition: CAHitNtupletGeneratorKernels.h:219

CAHitNtupletGeneratorKernels::device_hitToTuple_apc_
cms::cuda::AtomicPairCounter * device_hitToTuple_apc_
Definition: CAHitNtupletGeneratorKernels.h:215