3 #include <cuda_runtime.h> 16 cudaCheck(cudaMalloc(&in1_d, size *
sizeof(
float)));
17 cudaCheck(cudaMalloc(&in2_d, size *
sizeof(
float)));
18 cudaCheck(cudaMalloc(&out_d, size *
sizeof(
float)));
21 cudaCheck(cudaMemcpy(in1_d, in1_h, size *
sizeof(
float), cudaMemcpyHostToDevice));
22 cudaCheck(cudaMemcpy(in2_d, in2_h, size *
sizeof(
float), cudaMemcpyHostToDevice));
25 cudaCheck(cudaMemset(out_d, 0, size *
sizeof(
float)));
31 cudaCheck(cudaMemcpy(out_h, out_d, size *
sizeof(
float), cudaMemcpyDeviceToHost));
47 cudaCheck(cudaMalloc(&in1_d, size *
sizeof(
double)));
48 cudaCheck(cudaMalloc(&in2_d, size *
sizeof(
double)));
49 cudaCheck(cudaMalloc(&out_d, size *
sizeof(
double)));
52 cudaCheck(cudaMemcpy(in1_d, in1_h, size *
sizeof(
double), cudaMemcpyHostToDevice));
53 cudaCheck(cudaMemcpy(in2_d, in2_h, size *
sizeof(
double), cudaMemcpyHostToDevice));
56 cudaCheck(cudaMemset(out_d, 0, size *
sizeof(
double)));
62 cudaCheck(cudaMemcpy(out_h, out_d, size *
sizeof(
double), cudaMemcpyDeviceToHost));
void wrapper_add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
void opaque_add_vectors_d(const double *in1, const double *in2, double *out, size_t size)
void opaque_add_vectors_f(const float *in1, const float *in2, float *out, size_t size)
#define cudaCheck(ARG,...)