CMS 3D CMS Logo

CUDATestKernelAdditionModule.cc
Go to the documentation of this file.
1 #include <cstddef>
2 #include <cstdint>
3 #include <iostream>
4 #include <random>
5 #include <vector>
6 
7 #include <cuda_runtime.h>
8 
18 
20 
22 public:
24  ~CUDATestKernelAdditionModule() override = default;
25 
26  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
27 
28  void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
29 
30 private:
31  const uint32_t size_;
32 };
33 
35  : size_(config.getParameter<uint32_t>("size")) {}
36 
39  desc.add<uint32_t>("size", 1024 * 1024);
40  descriptions.addWithDefaultLabel(desc);
41 }
42 
44  // require CUDA for running
46  if (not service or not service->enabled()) {
47  std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
48  return;
49  }
50 
51  // random number generator with a gaussian distribution
52  std::random_device rd{};
53  std::default_random_engine rand{rd()};
54  std::normal_distribution<float> dist{0., 1.};
55 
56  // tolerance
57  constexpr float epsilon = 0.000001;
58 
59  // allocate input and output host buffers
60  std::vector<float> in1_h(size_);
61  std::vector<float> in2_h(size_);
62  std::vector<float> out_h(size_);
63 
64  // fill the input buffers with random data, and the output buffer with zeros
65  for (size_t i = 0; i < size_; ++i) {
66  in1_h[i] = dist(rand);
67  in2_h[i] = dist(rand);
68  out_h[i] = 0.;
69  }
70 
71  // allocate input and output buffers on the device
72  float* in1_d;
73  float* in2_d;
74  float* out_d;
75  cudaCheck(cudaMalloc(&in1_d, size_ * sizeof(float)));
76  cudaCheck(cudaMalloc(&in2_d, size_ * sizeof(float)));
77  cudaCheck(cudaMalloc(&out_d, size_ * sizeof(float)));
78 
79  // copy the input data to the device
80  cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
81  cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
82 
83  // fill the output buffer with zeros
84  cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float)));
85 
86  // launch the 1-dimensional kernel for vector addition
88 
89  // copy the results from the device to the host
90  cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost));
91 
92  // wait for all the operations to complete
93  cudaCheck(cudaDeviceSynchronize());
94 
95  // check the results
96  for (size_t i = 0; i < size_; ++i) {
97  float sum = in1_h[i] + in2_h[i];
98  assert(out_h[i] < sum + epsilon);
99  assert(out_h[i] > sum - epsilon);
100  }
101 
102  std::cout << "All tests passed.\n";
103 }
104 
void analyze(edm::StreamID, edm::Event const &event, edm::EventSetup const &setup) const override
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
Definition: config.py:1
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
assert(be >=bs)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
~CUDATestKernelAdditionModule() override=default
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
CUDATestKernelAdditionModule(edm::ParameterSet const &config)
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
Definition: event.py:1