CMS 3D CMS Logo

DeviceAdditionOpaque.cc
Go to the documentation of this file.
1 #include <cstddef>
2 
3 #include <cuda_runtime.h>
4 
8 
9 namespace cms::cudatest {
10 
11  void opaque_add_vectors_f(const float* in1_h, const float* in2_h, float* out_h, size_t size) {
12  // allocate input and output buffers on the device
13  float* in1_d;
14  float* in2_d;
15  float* out_d;
16  cudaCheck(cudaMalloc(&in1_d, size * sizeof(float)));
17  cudaCheck(cudaMalloc(&in2_d, size * sizeof(float)));
18  cudaCheck(cudaMalloc(&out_d, size * sizeof(float)));
19 
20  // copy the input data to the device
21  cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(float), cudaMemcpyHostToDevice));
22  cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(float), cudaMemcpyHostToDevice));
23 
24  // fill the output buffer with zeros
25  cudaCheck(cudaMemset(out_d, 0, size * sizeof(float)));
26 
27  // launch the 1-dimensional kernel for vector addition
28  wrapper_add_vectors_f(in1_d, in2_d, out_d, size);
29 
30  // copy the results from the device to the host
31  cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(float), cudaMemcpyDeviceToHost));
32 
33  // wait for all the operations to complete
34  cudaCheck(cudaDeviceSynchronize());
35 
36  // free the input and output buffers on the device
37  cudaCheck(cudaFree(in1_d));
38  cudaCheck(cudaFree(in2_d));
39  cudaCheck(cudaFree(out_d));
40  }
41 
42  void opaque_add_vectors_d(const double* in1_h, const double* in2_h, double* out_h, size_t size) {
43  // allocate input and output buffers on the device
44  double* in1_d;
45  double* in2_d;
46  double* out_d;
47  cudaCheck(cudaMalloc(&in1_d, size * sizeof(double)));
48  cudaCheck(cudaMalloc(&in2_d, size * sizeof(double)));
49  cudaCheck(cudaMalloc(&out_d, size * sizeof(double)));
50 
51  // copy the input data to the device
52  cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(double), cudaMemcpyHostToDevice));
53  cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(double), cudaMemcpyHostToDevice));
54 
55  // fill the output buffer with zeros
56  cudaCheck(cudaMemset(out_d, 0, size * sizeof(double)));
57 
58  // launch the 1-dimensional kernel for vector addition
59  wrapper_add_vectors_d(in1_d, in2_d, out_d, size);
60 
61  // copy the results from the device to the host
62  cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(double), cudaMemcpyDeviceToHost));
63 
64  // wait for all the operations to complete
65  cudaCheck(cudaDeviceSynchronize());
66 
67  // free the input and output buffers on the device
68  cudaCheck(cudaFree(in1_d));
69  cudaCheck(cudaFree(in2_d));
70  cudaCheck(cudaFree(out_d));
71  }
72 
73 } // namespace cms::cudatest
void wrapper_add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
void opaque_add_vectors_d(const double *in1, const double *in2, double *out, size_t size)
void opaque_add_vectors_f(const float *in1, const float *in2, float *out, size_t size)
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69