CMS 3D CMS Logo

ROCmService.cc
Go to the documentation of this file.
1 #include <iomanip>
2 #include <iostream>
3 #include <limits>
4 #include <set>
5 #include <string>
6 #include <vector>
7 
8 #include <hip/hip_runtime.h>
9 #include <rocm_version.h>
10 #include <rocm_smi/rocm_smi.h>
11 
21 
22 class ROCmService : public ROCmInterface {
23 public:
25  ~ROCmService() override;
26 
27  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
28 
29  bool enabled() const final { return enabled_; }
30 
31  int numberOfDevices() const final { return numberOfDevices_; }
32 
33  // Return the (major, minor) compute capability of the given device.
34  std::pair<int, int> computeCapability(int device) const final {
35  int size = computeCapabilities_.size();
36  if (device < 0 or device >= size) {
37  throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
38  std::to_string(size - 1));
39  }
40  return computeCapabilities_[device];
41  }
42 
43 private:
45  std::vector<std::pair<int, int>> computeCapabilities_;
46  bool enabled_ = false;
47  bool verbose_ = false;
48 };
49 
50 void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
51 #if HIP_VERSION >= 50400000
52  // read the current device
53  int device;
54  hipCheck(hipGetDevice(&device));
55  // try to set the requested limit
56  auto result = hipDeviceSetLimit(limit, request);
57  if (hipErrorUnsupportedLimit == result) {
58  edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
59  return;
60  }
61  // read back the limit value
62  size_t value;
63  result = hipDeviceGetLimit(&value, limit);
64  if (hipSuccess != result) {
65  edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
66  << request << ", current value is " << value;
67  } else if (value != request) {
68  edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
69  << " instead of requested " << request;
70  }
71 #else
72  edm::LogWarning("ROCmService") << "ROCm versions below 5.4.0 do not support setting device limits.";
73 #endif
74 }
75 
77  // decode 50631061 as 5.6.31061
78  return std::to_string(version / 10000000) + '.' + std::to_string(version / 100000 % 100) + '.' +
79  std::to_string(version % 100000);
80 }
81 
83 ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter<bool>("verbose")) {
84  if (not config.getUntrackedParameter<bool>("enabled")) {
85  edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
86  return;
87  }
88 
89  auto status = hipGetDeviceCount(&numberOfDevices_);
90  if (hipSuccess != status) {
91  edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
92  << "Disabling the ROCmService.";
93  return;
94  }
96 
97  // AMD system driver version, e.g. 5.16.9.22.20 or 6.1.5
98  char systemDriverVersion[256];
99  rsmiCheck(rsmi_init(0x00));
100  rsmiCheck(rsmi_version_str_get(RSMI_SW_COMP_DRIVER, systemDriverVersion, sizeof(systemDriverVersion) - 1));
101  rsmiCheck(rsmi_shut_down());
102 
103  // ROCm driver version, e.g. 11.4
104  // the full version, like 11.4.1 or 11.4.100, is not reported
105  int driverVersion = 0;
106  hipCheck(hipDriverGetVersion(&driverVersion));
107 
108  // ROCm runtime version, e.g. 11.4
109  // the full version, like 11.4.1 or 11.4.108, is not reported
110  int runtimeVersion = 0;
111  hipCheck(hipRuntimeGetVersion(&runtimeVersion));
112 
113  edm::LogInfo log("ROCmService");
114  if (verbose_) {
115  log << "AMD kernel driver: " << systemDriverVersion << '\n';
116  log << "ROCm driver API: " << decodeVersion(driverVersion) << " (compiled with ROCm " <<
117 #ifdef ROCM_BUILD_INFO
118  // ROCM_BUILD_INFO has been introduced in ROCm 5.5.0
119  ROCM_BUILD_INFO
120 #else
121  ROCM_VERSION_MAJOR << '.' << ROCM_VERSION_MINOR << '.' << ROCM_VERSION_PATCH
122 #endif
123  << ")\n";
124  log << "ROCm runtime API: " << decodeVersion(runtimeVersion) << " (compiled with HIP " << HIP_VERSION_MAJOR << '.'
125  << HIP_VERSION_MINOR << '.' << HIP_VERSION_PATCH << ")\n";
126  log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
127  } else {
128  log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
129  << decodeVersion(driverVersion) << ", AMD driver version " << systemDriverVersion;
130  }
131 
132 #if HIP_VERSION >= 50400000
133  auto const& limits = config.getUntrackedParameter<edm::ParameterSet>("limits");
134  auto stackSize = limits.getUntrackedParameter<int>("hipLimitStackSize");
135  auto mallocHeapSize = limits.getUntrackedParameter<int>("hipLimitMallocHeapSize");
136 #endif
137 
138  std::set<std::string> models;
139 
140  for (int i = 0; i < numberOfDevices_; ++i) {
141  // read information about the compute device.
142  // see the documentation of hipGetDeviceProperties() for more information.
143  hipDeviceProp_t properties;
144  hipCheck(hipGetDeviceProperties(&properties, i));
145  log << '\n' << "ROCm device " << i << ": " << properties.name;
146  if (verbose_) {
147  log << '\n';
148  }
149  models.insert(std::string(properties.name));
150 
151  // compute capabilities
152  computeCapabilities_.emplace_back(properties.major, properties.minor);
153  if (verbose_) {
154  log << " compute capability: " << properties.gcnArchName;
155  } else {
156  log << " (" << properties.gcnArchName << ")";
157  }
158  if (verbose_) {
159  log << '\n';
160  log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
161  log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
162  // ROCm does not provide single to double performance ratio
163  }
164 
165  // compute mode
166  static constexpr const char* computeModeDescription[] = {
167  "default (shared)", // hipComputeModeDefault
168  "exclusive (single thread)", // hipComputeModeExclusive
169  "prohibited", // hipComputeModeProhibited
170  "exclusive (single process)", // hipComputeModeExclusiveProcess
171  "unknown"};
172  if (verbose_) {
173  log << " compute mode:" << std::right << std::setw(27)
174  << computeModeDescription[std::min(properties.computeMode,
175  static_cast<int>(std::size(computeModeDescription)) - 1)]
176  << '\n';
177  }
178 
179  // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
180  hipCheck(hipSetDevice(i));
181  hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
182 
183  if (verbose_) {
184  // read the free and total amount of memory available for allocation by the device, in bytes.
185  // see the documentation of hipMemGetInfo() for more information.
186  size_t freeMemory = 0;
187  size_t totalMemory = 0;
188  hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
189  log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
190  << totalMemory / (1 << 20) << " MB total\n";
191  log << " constant memory: " << std::setw(8) << properties.totalConstMem / (1 << 10) << " kB\n";
192  log << " L2 cache size: " << std::setw(8) << properties.l2CacheSize / (1 << 10) << " kB\n";
193 
194  log << '\n';
195 
196  // other capabilities
197  log << "Other capabilities\n";
198  log << " " << (properties.canMapHostMemory ? "can" : "cannot")
199  << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
200  log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
201  << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
202  log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
203  << " access pageable memory via the host's page tables\n";
204  log << " " << (properties.managedMemory ? "supports" : "does not support")
205  << " allocating managed memory on this system\n";
206  log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
207  << " coherently access managed memory concurrently with the host\n";
208  log << " "
209  << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
210  << " directly access managed memory on the device without migration\n";
211  log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
212  << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
213  log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
214  << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
215  log << '\n';
216  }
217 
218  // set and read the ROCm device flags.
219  // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
220  if (verbose_) {
221  log << "ROCm flags\n";
222  unsigned int flags;
223  hipCheck(hipGetDeviceFlags(&flags));
224  switch (flags & hipDeviceScheduleMask) {
225  case hipDeviceScheduleAuto:
226  log << " thread policy: default\n";
227  break;
228  case hipDeviceScheduleSpin:
229  log << " thread policy: spin\n";
230  break;
231  case hipDeviceScheduleYield:
232  log << " thread policy: yield\n";
233  break;
234  case hipDeviceScheduleBlockingSync:
235  log << " thread policy: blocking sync\n";
236  break;
237  default:
238  log << " thread policy: undefined\n";
239  }
240  if (flags & hipDeviceMapHost) {
241  log << " pinned host memory allocations: enabled\n";
242  } else {
243  log << " pinned host memory allocations: disabled\n";
244  }
245  if (flags & hipDeviceLmemResizeToMax) {
246  log << " kernel host memory reuse: enabled\n";
247  } else {
248  log << " kernel host memory reuse: disabled\n";
249  }
250  log << '\n';
251  }
252 
253  // set and read the ROCm resource limits.
254  // see the documentation of hipDeviceSetLimit() for more information.
255 
256 #if HIP_VERSION >= 50400000
257  // hipLimitStackSize controls the stack size in bytes of each GPU thread.
258  if (stackSize >= 0) {
259  setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
260  }
261  // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
262  // and free() device system calls.
263  if (mallocHeapSize >= 0) {
264  setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
265  }
266 #endif
267 
268  if (verbose_) {
269  size_t value;
270  log << "ROCm limits\n";
271 #if HIP_VERSION >= 50400000
272  hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
273  log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
274 #endif
275  hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
276  log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
277  }
278  }
279 
280  edm::Service<edm::ResourceInformation> resourceInformationService;
281  if (resourceInformationService.isAvailable()) {
282  std::vector<std::string> modelsV(models.begin(), models.end());
283  resourceInformationService->setGPUModels(modelsV);
284  /*
285  std::string nvidiaDriverVersion{systemDriverVersion};
286  resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
287  resourceInformationService->setCudaDriverVersion(driverVersion);
288  resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
289  */
290  }
291 
292  if (verbose_) {
293  log << '\n' << "ROCmService fully initialized";
294  }
295  enabled_ = true;
296 }
297 
299  if (enabled_) {
300  for (int i = 0; i < numberOfDevices_; ++i) {
301  hipCheck(hipSetDevice(i));
302  hipCheck(hipDeviceSynchronize());
303  // Explicitly destroys and cleans up all resources associated with the current device in the
304  // current process. Any subsequent API call to this device will reinitialize the device.
305  // Useful to check for memory leaks.
306  hipCheck(hipDeviceReset());
307  }
308  }
309 }
310 
313  desc.addUntracked<bool>("enabled", true);
314  desc.addUntracked<bool>("verbose", false);
315 
316 #if HIP_VERSION >= 50400000
318  limits.addUntracked<int>("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
319  limits.addUntracked<int>("hipLimitMallocHeapSize", -1)
320  ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
321  desc.addUntracked<edm::ParameterSetDescription>("limits", limits)
322  ->setComment(
323  "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
324  "the default value.");
325 #endif
326 
327  descriptions.add("ROCmService", desc);
328 }
329 
330 namespace edm {
331  namespace service {
332  inline bool isProcessWideService(ROCmService const*) { return true; }
333  } // namespace service
334 } // namespace edm
335 
size
Write out results.
~ROCmService() override
Definition: ROCmService.cc:298
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:102
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:98
Definition: config.py:1
Definition: models.py:1
#define rsmiCheck(ARG,...)
Definition: rsmiCheck.h:53
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
Definition: ROCmService.cc:83
std::vector< std::pair< int, int > > computeCapabilities_
Definition: ROCmService.cc:45
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: ROCmService.cc:311
int numberOfDevices() const final
Definition: ROCmService.cc:31
Definition: value.py:1
bool enabled() const final
Definition: ROCmService.cc:29
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Definition: ROCmService.cc:50
Log< level::Info, false > LogInfo
int numberOfDevices_
Definition: ROCmService.cc:44
#define hipCheck(ARG,...)
Definition: hipCheck.h:52
void add(std::string const &label, ParameterSetDescription const &psetDescription)
HLT enums.
std::string decodeVersion(int version)
Definition: ROCmService.cc:76
bool isAvailable() const
Definition: Service.h:40
Log< level::Warning, false > LogWarning
virtual void setGPUModels(std::vector< std::string > const &)=0
std::pair< int, int > computeCapability(int device) const final
Definition: ROCmService.cc:34