3 #include <cuda_runtime.h> 21 cudaCheck(cudaMemcpy(in1_d, in1_h,
size *
sizeof(
float), cudaMemcpyHostToDevice));
22 cudaCheck(cudaMemcpy(in2_d, in2_h,
size *
sizeof(
float), cudaMemcpyHostToDevice));
31 cudaCheck(cudaMemcpy(out_h, out_d,
size *
sizeof(
float), cudaMemcpyDeviceToHost));
52 cudaCheck(cudaMemcpy(in1_d, in1_h,
size *
sizeof(
double), cudaMemcpyHostToDevice));
53 cudaCheck(cudaMemcpy(in2_d, in2_h,
size *
sizeof(
double), cudaMemcpyHostToDevice));
62 cudaCheck(cudaMemcpy(out_h, out_d,
size *
sizeof(
double), cudaMemcpyDeviceToHost));
void wrapper_add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
void opaque_add_vectors_d(const double *in1, const double *in2, double *out, size_t size)
void opaque_add_vectors_f(const float *in1, const float *in2, float *out, size_t size)
#define cudaCheck(ARG,...)