dc/d30/copyAsync_8h_source.html

 #ifndef HeterogeneousCore_CUDAUtilities_interface_copyAsync_h

 #define HeterogeneousCore_CUDAUtilities_interface_copyAsync_h


 #include <type_traits>

 #include <vector>


 #include "FWCore/Utilities/interface/propagate_const_array.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"


 namespace cms {

   namespace cuda {


     // Single element


     template <typename T>

     inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {

       // Shouldn't compile for array types because of sizeof(T), but

       // let's add an assert with a more helpful message

       static_assert(std::is_array<T>::value == false,

                     "For array types, use the other overload with the size parameter");

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));

     }


     template <typename T>

     inline void copyAsync(device::unique_ptr<T>& dst, const host::noncached::unique_ptr<T>& src, cudaStream_t stream) {

       // Shouldn't compile for array types because of sizeof(T), but

       // let's add an assert with a more helpful message

       static_assert(std::is_array<T>::value == false,

                     "For array types, use the other overload with the size parameter");

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));

     }


     template <typename T>

     inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {

       static_assert(std::is_array<T>::value == false,

                     "For array types, use the other overload with the size parameter");

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));

     }


     // Multiple elements


     template <typename T>

     inline void copyAsync(device::unique_ptr<T[]>& dst,

                           const host::unique_ptr<T[]>& src,

                           size_t nelements,

                           cudaStream_t stream) {

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));

     }


     template <typename T>

     inline void copyAsync(device::unique_ptr<T[]>& dst,

                           const host::noncached::unique_ptr<T[]>& src,

                           size_t nelements,

                           cudaStream_t stream) {

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));

     }


     template <typename T>

     inline void copyAsync(host::unique_ptr<T[]>& dst,

                           const device::unique_ptr<T[]>& src,

                           size_t nelements,

                           cudaStream_t stream) {

       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));

     }


     // copy from a host vector using pinned memory

     template <typename T>

     inline void copyAsync(cms::cuda::device::unique_ptr<T[]>& dst,

                           const std::vector<T, cms::cuda::HostAllocator<T>>& src,

                           cudaStream_t stream) {

       cudaCheck(cudaMemcpyAsync(dst.get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));

     }


     // special case used to transfer conditions data

     template <typename T>

     inline void copyAsync(edm::propagate_const_array<cms::cuda::device::unique_ptr<T[]>>& dst,

                           const std::vector<T, cms::cuda::HostAllocator<T>>& src,

                           cudaStream_t stream) {

       cudaCheck(cudaMemcpyAsync(

           get_underlying(dst).get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));

     }

   }  // namespace cuda

 }  // namespace cms


 #endif  // HeterogeneousCore_CUDAUtilities_interface_copyAsync_h

ecalDigis_cff.cuda
tuple cuda
Definition: ecalDigis_cff.py:35

relativeConstraints.value
tuple value
Definition: relativeConstraints.py:55

HostAllocator.h

cms::cuda::host::noncached::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_noncached_unique_ptr.h:23

cms::cuda::stream
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

cms::cuda::device::unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
Definition: device_unique_ptr.h:33

cms::cuda::copyAsync
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20

host_unique_ptr.h

alcazmumu_cfi.src
tuple src
Definition: alcazmumu_cfi.py:30

edm::propagate_const_array
Definition: propagate_const_array.h:61

cms::cuda::HostAllocator
Definition: HostAllocator.h:24

cudaCheck.h

edm::get_underlying
constexpr T & get_underlying(propagate_const< T > &)
Definition: propagate_const.h:103

math::cholesky::dst
constexpr void M2 & dst
Definition: choleskyInversion.h:92

device_unique_ptr.h

trackerHitRTTI::vector
Definition: trackerHitRTTI.h:21

cms::cuda::host::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_unique_ptr.h:21

host_noncached_unique_ptr.h

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

propagate_const_array.h

T
long double T
Definition: Basic3DVectorLD.h:48