CMS 3D CMS Logo

CUDATestWrapperAdditionModule.cc
Go to the documentation of this file.
1 #include <cstddef>
2 #include <cstdint>
3 #include <iostream>
4 #include <random>
5 #include <vector>
6 
7 #include <cuda_runtime.h>
8 
19 
21 public:
23  ~CUDATestWrapperAdditionModule() override = default;
24 
25  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
26 
27  void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
28 
29 private:
30  const uint32_t size_;
31 };
32 
34  : size_(config.getParameter<uint32_t>("size")) {}
35 
38  desc.add<uint32_t>("size", 1024 * 1024);
39  descriptions.addWithDefaultLabel(desc);
40 }
41 
43  edm::Event const& event,
44  edm::EventSetup const& setup) const {
45  // require CUDA for running
47  if (not cuda or not cuda->enabled()) {
48  std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
49  return;
50  }
51 
52  // random number generator with a gaussian distribution
53  std::random_device rd{};
54  std::default_random_engine rand{rd()};
55  std::normal_distribution<float> dist{0., 1.};
56 
57  // tolerance
58  constexpr float epsilon = 0.000001;
59 
60  // allocate input and output host buffers
61  std::vector<float> in1_h(size_);
62  std::vector<float> in2_h(size_);
63  std::vector<float> out_h(size_);
64 
65  // fill the input buffers with random data, and the output buffer with zeros
66  for (size_t i = 0; i < size_; ++i) {
67  in1_h[i] = dist(rand);
68  in2_h[i] = dist(rand);
69  out_h[i] = 0.;
70  }
71 
72  // allocate input and output buffers on the device
73  float* in1_d;
74  float* in2_d;
75  float* out_d;
76  cudaCheck(cudaMalloc(&in1_d, size_ * sizeof(float)));
77  cudaCheck(cudaMalloc(&in2_d, size_ * sizeof(float)));
78  cudaCheck(cudaMalloc(&out_d, size_ * sizeof(float)));
79 
80  // copy the input data to the device
81  cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
82  cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
83 
84  // fill the output buffer with zeros
85  cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float)));
86 
87  // launch the 1-dimensional kernel for vector addition
88  cms::cudatest::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
89 
90  // copy the results from the device to the host
91  cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost));
92 
93  // wait for all the operations to complete
94  cudaCheck(cudaDeviceSynchronize());
95 
96  // check the results
97  for (size_t i = 0; i < size_; ++i) {
98  float sum = in1_h[i] + in2_h[i];
99  assert(out_h[i] < sum + epsilon);
100  assert(out_h[i] > sum - epsilon);
101  }
102 
103  std::cout << "All tests passed.\n";
104 }
105 
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
~CUDATestWrapperAdditionModule() override=default
void wrapper_add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
Definition: config.py:1
assert(be >=bs)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void analyze(edm::StreamID, edm::Event const &event, edm::EventSetup const &setup) const override
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
CUDATestWrapperAdditionModule(edm::ParameterSet const &config)
Definition: event.py:1