d7/d2e/CachedBufAlloc_8h_source.html

 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h

 #include <alpaka/alpaka.hpp>

 #include "HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h"

 namespace cms::alpakatools {

   namespace traits {

     template <typename TElem,
               typename TDim,
               typename TIdx,
               typename TDev,
               typename TQueue,
               typename = void,
               typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
     struct CachedBufAlloc {
       static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
     };

     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
           -> alpaka::BufCpu<TElem, TDim, TIdx> {
         // non-cached, queue-ordered asynchronous host-only memory
         return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
       }
     };

 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED

     template <typename TElem, typename TDim, typename TIdx>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueCudaRtBlocking queue,
                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();

         // FIXME the BufCpu does not support a pitch ?
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };

     template <typename TElem, typename TDim, typename TIdx>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueCudaRtNonBlocking queue,
                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();

         // FIXME the BufCpu does not support a pitch ?
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };

     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
           -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);

         size_t width = alpaka::getWidth(extent);
         size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
         // TODO implement pitch for TDim > 1
         size_t pitchBytes = widthBytes;
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufCudaRt<TElem, TDim, TIdx>(
             dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
       }
     };

 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED

 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED

     template <typename TElem, typename TDim, typename TIdx>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueHipRtBlocking queue,
                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();

         // FIXME the BufCpu does not support a pitch ?
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };

     template <typename TElem, typename TDim, typename TIdx>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueHipRtNonBlocking queue,
                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();

         // FIXME the BufCpu does not support a pitch ?
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };

     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
           -> alpaka::BufHipRt<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

         auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);

         size_t width = alpaka::getWidth(extent);
         size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
         // TODO implement pitch for TDim > 1
         size_t pitchBytes = widthBytes;
         size_t size = alpaka::getExtentProduct(extent);
         size_t sizeBytes = size * sizeof(TElem);
         void* memPtr = allocator.allocate(sizeBytes, queue);

         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

         return alpaka::BufHipRt<TElem, TDim, TIdx>(
             dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
       }
     };

 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED

   }  // namespace traits

   template <typename TElem,
             typename TIdx,
             typename TExtent,
             typename TQueue,
             typename TDev,
             typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
   ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
     return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
   }

 }  // namespace cms::alpakatools

 #endif  // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
findQualityFiles.size
size
Write out results.
Definition: findQualityFiles.py:443

deleter
Definition: QcdLowPtDQM.cc:34

cms::alpakatools::allocCachedBuf
ALPAKA_FN_HOST auto allocCachedBuf(TDev const &dev, TQueue queue, TExtent const &extent=TExtent())
Definition: CachedBufAlloc.h:196

createBeamHaloJobs.queue
queue
Definition: createBeamHaloJobs.py:343

cms::alpakatools
Definition: HcalRecoParamWithPulseShapeHost.h:11

funct::void
TEMPL(T2) struct Divides void
Definition: Factorize.h:24

getHostCachingAllocator.h

alpaka
Definition: HostOnlyTask.h:11

traits
Definition: PortableCollection.h:11

relativeConstraints.value
value
Definition: relativeConstraints.py:53

ApeEstimator_cff.width
width
Definition: ApeEstimator_cff.py:24

cms::alpakatools::traits::CachedBufAlloc
The caching memory allocator trait.
Definition: CachedBufAlloc.h:21

getDeviceCachingAllocator.h

eostools.move
def move(src, dest)
Definition: eostools.py:511

cms::alpakatools::traits::CachedBufAlloc< TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void >::allocCachedBuf
static ALPAKA_FN_HOST auto allocCachedBuf(alpaka::DevCpu const &dev, TQueue queue, TExtent const &extent) -> alpaka::BufCpu< TElem, TDim, TIdx >
Definition: CachedBufAlloc.h:29