CMS 3D CMS Logo

copyAsync.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_copyAsync_h
2 #define HeterogeneousCore_CUDAUtilities_copyAsync_h
3 
7 
8 #include <type_traits>
9 
10 namespace cms {
11  namespace cuda {
12  // Single element
13  template <typename T>
14  inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {
15  // Shouldn't compile for array types because of sizeof(T), but
16  // let's add an assert with a more helpful message
17  static_assert(std::is_array<T>::value == false,
18  "For array types, use the other overload with the size parameter");
19  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
20  }
21 
22  template <typename T>
23  inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {
24  static_assert(std::is_array<T>::value == false,
25  "For array types, use the other overload with the size parameter");
26  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));
27  }
28 
29  // Multiple elements
30  template <typename T>
31  inline void copyAsync(device::unique_ptr<T[]>& dst,
32  const host::unique_ptr<T[]>& src,
33  size_t nelements,
34  cudaStream_t stream) {
35  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
36  }
37 
38  template <typename T>
39  inline void copyAsync(host::unique_ptr<T[]>& dst,
40  const device::unique_ptr<T[]>& src,
41  size_t nelements,
42  cudaStream_t stream) {
43  cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));
44  }
45  } // namespace cuda
46 } // namespace cms
47 
48 #endif
cms::cuda::stream
cudaStream_t stream
Definition: HistoContainer.h:57
device_unique_ptr.h
host_unique_ptr.h
prod1Switch_cff.cuda
cuda
Definition: prod1Switch_cff.py:11
TrackRefitter_38T_cff.src
src
Definition: TrackRefitter_38T_cff.py:24
cudaCheck.h
cms::cuda::device::unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
Definition: device_unique_ptr.h:33
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cuda::host::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_unique_ptr.h:21
relativeConstraints.value
value
Definition: relativeConstraints.py:53
math::cholesky::dst
M2 & dst
Definition: choleskyInversion.h:158
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
cms::cuda::copyAsync
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:14