CMS 3D CMS Logo

Classes | Functions
cms::cudatest Namespace Reference

Classes

class  Thing
 

Functions

__device__ void add_vectors_d (const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
 
__device__ void add_vectors_f (const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
 
__global__ void kernel_add_vectors_d (const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
 
__global__ void kernel_add_vectors_f (const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
 
void opaque_add_vectors_d (const double *in1, const double *in2, double *out, size_t size)
 
void opaque_add_vectors_f (const float *in1, const float *in2, float *out, size_t size)
 
void requireDevices ()
 Print message and exit if there are no CUDA devices. More...
 
bool testDevices ()
 In presence of CUDA devices, return true; otherwise print message and return false. More...
 
void wrapper_add_vectors_d (const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
 
void wrapper_add_vectors_f (const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
 

Function Documentation

◆ add_vectors_d()

__device__ void cms::cudatest::add_vectors_d ( const double *__restrict__  in1,
const double *__restrict__  in2,
double *__restrict__  out,
size_t  size 
)

Definition at line 22 of file DeviceAddition.hip.cc.

References cms::cudacompat::blockDim, cms::cudacompat::blockIdx, cms::cudacompat::gridDim, mps_fire::i, MillePedeFileConverter_cfg::out, riemannFit::stride, and cms::cudacompat::threadIdx.

Referenced by cms::rocmtest::kernel_add_vectors_d().

25  {
26  uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
27  uint32_t stride = blockDim.x * gridDim.x;
28 
29  for (size_t i = thread; i < size; i += stride) {
30  out[i] = in1[i] + in2[i];
31  }
32  }
const dim3 threadIdx
Definition: cudaCompat.h:29
size
Write out results.
const dim3 gridDim
Definition: cudaCompat.h:33
const dim3 blockDim
Definition: cudaCompat.h:30
constexpr uint32_t stride
Definition: HelixFit.h:17
const dim3 blockIdx
Definition: cudaCompat.h:32

◆ add_vectors_f()

__device__ void cms::cudatest::add_vectors_f ( const float *__restrict__  in1,
const float *__restrict__  in2,
float *__restrict__  out,
size_t  size 
)

Definition at line 10 of file DeviceAddition.hip.cc.

References cms::cudacompat::blockDim, cms::cudacompat::blockIdx, cms::cudacompat::gridDim, mps_fire::i, MillePedeFileConverter_cfg::out, riemannFit::stride, and cms::cudacompat::threadIdx.

Referenced by cms::rocmtest::kernel_add_vectors_f(), and HeterogeneousCoreROCmTestDevicePlugins::kernel_add_vectors_f().

13  {
14  uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
15  uint32_t stride = blockDim.x * gridDim.x;
16 
17  for (size_t i = thread; i < size; i += stride) {
18  out[i] = in1[i] + in2[i];
19  }
20  }
const dim3 threadIdx
Definition: cudaCompat.h:29
size
Write out results.
const dim3 gridDim
Definition: cudaCompat.h:33
const dim3 blockDim
Definition: cudaCompat.h:30
constexpr uint32_t stride
Definition: HelixFit.h:17
const dim3 blockIdx
Definition: cudaCompat.h:32

◆ kernel_add_vectors_d()

__global__ void cms::cudatest::kernel_add_vectors_d ( const double *__restrict__  in1,
const double *__restrict__  in2,
double *__restrict__  out,
size_t  size 
)

Definition at line 17 of file DeviceAdditionKernel.hip.cc.

References cms::rocmtest::add_vectors_d(), and MillePedeFileConverter_cfg::out.

20  {
21  add_vectors_d(in1, in2, out, size);
22  }
size
Write out results.
__device__ void add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)

◆ kernel_add_vectors_f()

__global__ void cms::cudatest::kernel_add_vectors_f ( const float *__restrict__  in1,
const float *__restrict__  in2,
float *__restrict__  out,
size_t  size 
)

Definition at line 12 of file ROCmTestDeviceAdditionAlgo.hip.cc.

References cms::rocmtest::add_vectors_f(), and MillePedeFileConverter_cfg::out.

15  {
17  }
size
Write out results.
__device__ void add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)

◆ opaque_add_vectors_d()

void cms::cudatest::opaque_add_vectors_d ( const double *  in1,
const double *  in2,
double *  out,
size_t  size 
)

Definition at line 42 of file DeviceAdditionOpaque.cc.

References cudaCheck, and wrapper_add_vectors_d().

42  {
43  // allocate input and output buffers on the device
44  double* in1_d;
45  double* in2_d;
46  double* out_d;
47  cudaCheck(cudaMalloc(&in1_d, size * sizeof(double)));
48  cudaCheck(cudaMalloc(&in2_d, size * sizeof(double)));
49  cudaCheck(cudaMalloc(&out_d, size * sizeof(double)));
50 
51  // copy the input data to the device
52  cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(double), cudaMemcpyHostToDevice));
53  cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(double), cudaMemcpyHostToDevice));
54 
55  // fill the output buffer with zeros
56  cudaCheck(cudaMemset(out_d, 0, size * sizeof(double)));
57 
58  // launch the 1-dimensional kernel for vector addition
59  wrapper_add_vectors_d(in1_d, in2_d, out_d, size);
60 
61  // copy the results from the device to the host
62  cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(double), cudaMemcpyDeviceToHost));
63 
64  // wait for all the operations to complete
65  cudaCheck(cudaDeviceSynchronize());
66 
67  // free the input and output buffers on the device
68  cudaCheck(cudaFree(in1_d));
69  cudaCheck(cudaFree(in2_d));
70  cudaCheck(cudaFree(out_d));
71  }
size
Write out results.
void wrapper_add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

◆ opaque_add_vectors_f()

void cms::cudatest::opaque_add_vectors_f ( const float *  in1,
const float *  in2,
float *  out,
size_t  size 
)

Definition at line 11 of file DeviceAdditionOpaque.cc.

References cudaCheck, and wrapper_add_vectors_f().

Referenced by CUDATestOpaqueAdditionModule::analyze().

11  {
12  // allocate input and output buffers on the device
13  float* in1_d;
14  float* in2_d;
15  float* out_d;
16  cudaCheck(cudaMalloc(&in1_d, size * sizeof(float)));
17  cudaCheck(cudaMalloc(&in2_d, size * sizeof(float)));
18  cudaCheck(cudaMalloc(&out_d, size * sizeof(float)));
19 
20  // copy the input data to the device
21  cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(float), cudaMemcpyHostToDevice));
22  cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(float), cudaMemcpyHostToDevice));
23 
24  // fill the output buffer with zeros
25  cudaCheck(cudaMemset(out_d, 0, size * sizeof(float)));
26 
27  // launch the 1-dimensional kernel for vector addition
28  wrapper_add_vectors_f(in1_d, in2_d, out_d, size);
29 
30  // copy the results from the device to the host
31  cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(float), cudaMemcpyDeviceToHost));
32 
33  // wait for all the operations to complete
34  cudaCheck(cudaDeviceSynchronize());
35 
36  // free the input and output buffers on the device
37  cudaCheck(cudaFree(in1_d));
38  cudaCheck(cudaFree(in2_d));
39  cudaCheck(cudaFree(out_d));
40  }
size
Write out results.
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

◆ requireDevices()

void cms::cudatest::requireDevices ( )

Print message and exit if there are no CUDA devices.

Definition at line 25 of file requireDevices.cc.

References beamvalidation::exit(), and testDevices().

25  {
26  if (not testDevices()) {
27  exit(EXIT_SUCCESS);
28  }
29  }
bool testDevices()
In presence of CUDA devices, return true; otherwise print message and return false.
def exit(msg="")

◆ testDevices()

bool cms::cudatest::testDevices ( )

In presence of CUDA devices, return true; otherwise print message and return false.

Definition at line 9 of file requireDevices.cc.

References DMR_cfg::cerr, cms::alpakatools::devices(), and mps_update::status.

Referenced by requireDevices().

9  {
10  int devices = 0;
11  auto status = cudaGetDeviceCount(&devices);
12  if (status != cudaSuccess) {
13  std::cerr << "Failed to initialise the CUDA runtime, the test will be skipped."
14  << "\n";
15  return false;
16  }
17  if (devices == 0) {
18  std::cerr << "No CUDA devices available, the test will be skipped."
19  << "\n";
20  return false;
21  }
22  return true;
23  }
std::vector< alpaka::Dev< TPlatform > > const & devices()
Definition: devices.h:35

◆ wrapper_add_vectors_d()

void cms::cudatest::wrapper_add_vectors_d ( const double *__restrict__  in1,
const double *__restrict__  in2,
double *__restrict__  out,
size_t  size 
)

Definition at line 20 of file DeviceAdditionWrapper.hip.cc.

References hipCheck, and MillePedeFileConverter_cfg::out.

Referenced by opaque_add_vectors_d(), and cms::rocmtest::opqaue_add_vectors_d().

23  {
24  // launch the 1-dimensional kernel for vector addition
25  kernel_add_vectors_d<<<32, 32>>>(in1, in2, out, size);
26  hipCheck(hipGetLastError());
27  }
size
Write out results.
#define hipCheck(ARG,...)
Definition: hipCheck.h:52

◆ wrapper_add_vectors_f()

void cms::cudatest::wrapper_add_vectors_f ( const float *__restrict__  in1,
const float *__restrict__  in2,
float *__restrict__  out,
size_t  size 
)