CMS 3D CMS Logo

copyAsync.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
2 #define HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
3 
4 #include <type_traits>
5 #include <vector>
6 
13 
14 namespace cms {
15  namespace cuda {
16 
17  // Single element
18 
19  template <typename T>
20  inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {
21  // Shouldn't compile for array types because of sizeof(T), but
22  // let's add an assert with a more helpful message
23  static_assert(std::is_array<T>::value == false,
24  "For array types, use the other overload with the size parameter");
25  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
26  }
27 
28  template <typename T>
29  inline void copyAsync(device::unique_ptr<T>& dst, const host::noncached::unique_ptr<T>& src, cudaStream_t stream) {
30  // Shouldn't compile for array types because of sizeof(T), but
31  // let's add an assert with a more helpful message
32  static_assert(std::is_array<T>::value == false,
33  "For array types, use the other overload with the size parameter");
34  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
35  }
36 
37  template <typename T>
38  inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {
39  static_assert(std::is_array<T>::value == false,
40  "For array types, use the other overload with the size parameter");
41  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));
42  }
43 
44  // Multiple elements
45 
46  template <typename T>
47  inline void copyAsync(device::unique_ptr<T[]>& dst,
48  const host::unique_ptr<T[]>& src,
49  size_t nelements,
50  cudaStream_t stream) {
51  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
52  }
53 
54  template <typename T>
55  inline void copyAsync(device::unique_ptr<T[]>& dst,
57  size_t nelements,
58  cudaStream_t stream) {
59  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
60  }
61 
62  template <typename T>
63  inline void copyAsync(host::unique_ptr<T[]>& dst,
64  const device::unique_ptr<T[]>& src,
65  size_t nelements,
66  cudaStream_t stream) {
67  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));
68  }
69 
70  // copy from a host vector using pinned memory
71  template <typename T>
74  cudaStream_t stream) {
75  cudaCheck(cudaMemcpyAsync(dst.get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
76  }
77 
78  // special case used to transfer conditions data
79  template <typename T>
82  cudaStream_t stream) {
83  cudaCheck(cudaMemcpyAsync(
84  get_underlying(dst).get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
85  }
86  } // namespace cuda
87 } // namespace cms
88 
89 #endif // HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
std::unique_ptr< T, impl::HostDeleter > unique_ptr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20
Namespace of DDCMS conversion namespace.
constexpr T & get_underlying(propagate_const< T > &)
constexpr void M2 & dst
std::unique_ptr< T, impl::HostDeleter > unique_ptr
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
long double T