CMS 3D CMS Logo

copyAsync.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
2 #define HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
3 
4 #include <type_traits>
5 #include <vector>
6 
13 
14 namespace cms {
15  namespace cuda {
16 
17  // Single element
18 
19  template <typename T>
20  inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {
21  // Shouldn't compile for array types because of sizeof(T), but
22  // let's add an assert with a more helpful message
23  static_assert(std::is_array<T>::value == false,
24  "For array types, use the other overload with the size parameter");
25  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
26  }
27 
28  template <typename T>
29  inline void copyAsync(device::unique_ptr<T>& dst, const host::noncached::unique_ptr<T>& src, cudaStream_t stream) {
30  // Shouldn't compile for array types because of sizeof(T), but
31  // let's add an assert with a more helpful message
32  static_assert(std::is_array<T>::value == false,
33  "For array types, use the other overload with the size parameter");
34  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
35  }
36 
37  template <typename T>
38  inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {
39  static_assert(std::is_array<T>::value == false,
40  "For array types, use the other overload with the size parameter");
41  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));
42  }
43 
44  // Multiple elements
45 
46  template <typename T>
47  inline void copyAsync(device::unique_ptr<T[]>& dst,
48  const host::unique_ptr<T[]>& src,
49  size_t nelements,
50  cudaStream_t stream) {
51  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
52  }
53 
54  template <typename T>
55  inline void copyAsync(device::unique_ptr<T[]>& dst,
57  size_t nelements,
58  cudaStream_t stream) {
59  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
60  }
61 
62  template <typename T>
63  inline void copyAsync(host::unique_ptr<T[]>& dst,
64  const device::unique_ptr<T[]>& src,
65  size_t nelements,
66  cudaStream_t stream) {
67  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));
68  }
69 
70  // copy from a host vector using pinned memory
71  template <typename T>
74  cudaStream_t stream) {
75  cudaCheck(cudaMemcpyAsync(dst.get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
76  }
77 
78  // special case used to transfer conditions data
79  template <typename T>
82  cudaStream_t stream) {
83  cudaCheck(cudaMemcpyAsync(
84  get_underlying(dst).get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
85  }
86  } // namespace cuda
87 } // namespace cms
88 
89 #endif // HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
HostAllocator.h
cms::cuda::stream
cudaStream_t stream
Definition: HistoContainer.h:57
cms::cuda::host::noncached::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_noncached_unique_ptr.h:23
SiPixelRawToDigi_cfi.cuda
cuda
Definition: SiPixelRawToDigi_cfi.py:14
device_unique_ptr.h
host_noncached_unique_ptr.h
host_unique_ptr.h
cms::cuda::HostAllocator
Definition: HostAllocator.h:24
math::cholesky::dst
constexpr void M2 & dst
Definition: choleskyInversion.h:23
TrackRefitter_38T_cff.src
src
Definition: TrackRefitter_38T_cff.py:24
edm::get_underlying
constexpr T & get_underlying(propagate_const< T > &)
Definition: propagate_const.h:103
trackerHitRTTI::vector
Definition: trackerHitRTTI.h:21
cudaCheck.h
get
#define get
edm::propagate_const_array
Definition: propagate_const_array.h:61
cms::cuda::device::unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
Definition: device_unique_ptr.h:33
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
propagate_const_array.h
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cuda::host::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_unique_ptr.h:21
relativeConstraints.value
value
Definition: relativeConstraints.py:53
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
cms::cuda::copyAsync
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20