CMS 3D CMS Logo

ROCmService.cc
Go to the documentation of this file.
1 #include <iomanip>
2 #include <iostream>
3 #include <limits>
4 #include <set>
5 #include <string>
6 #include <vector>
7 
8 #include <hip/hip_runtime.h>
9 /*
10 #include <nvml.h>
11 */
12 
21 /*
22 #include "HeterogeneousCore/ROCmUtilities/interface/nvmlCheck.h"
23 */
24 
25 class ROCmService : public ROCmInterface {
26 public:
28  ~ROCmService() override;
29 
30  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
31 
32  bool enabled() const final { return enabled_; }
33 
34  int numberOfDevices() const final { return numberOfDevices_; }
35 
36  // Return the (major, minor) compute capability of the given device.
37  std::pair<int, int> computeCapability(int device) const final {
38  int size = computeCapabilities_.size();
39  if (device < 0 or device >= size) {
40  throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
41  std::to_string(size - 1));
42  }
43  return computeCapabilities_[device];
44  }
45 
46 private:
48  std::vector<std::pair<int, int>> computeCapabilities_;
49  bool enabled_ = false;
50  bool verbose_ = false;
51 };
52 
53 void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
54  // read the current device
55  int device;
56  hipCheck(hipGetDevice(&device));
57  // try to set the requested limit
58  auto result = hipDeviceSetLimit(limit, request);
59  if (hipErrorUnsupportedLimit == result) {
60  edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
61  return;
62  }
63  // read back the limit value
64  size_t value;
65  result = hipDeviceGetLimit(&value, limit);
66  if (hipSuccess != result) {
67  edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
68  << request << ", current value is " << value;
69  } else if (value != request) {
70  edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
71  << " instead of requested " << request;
72  }
73 }
74 
76  return std::to_string(version / 1000) + '.' + std::to_string(version % 1000 / 10);
77 }
78 
80 ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter<bool>("verbose")) {
81  if (not config.getUntrackedParameter<bool>("enabled")) {
82  edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
83  return;
84  }
85 
86  auto status = hipGetDeviceCount(&numberOfDevices_);
87  if (hipSuccess != status) {
88  edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
89  << "Disabling the ROCmService.";
90  return;
91  }
93 
94  /*
95  // AMD system driver version, e.g. 470.57.02
96  char systemDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
97  nvmlCheck(nvmlInitWithFlags(NVML_INIT_FLAG_NO_GPUS | NVML_INIT_FLAG_NO_ATTACH));
98  nvmlCheck(nvmlSystemGetDriverVersion(systemDriverVersion, sizeof(systemDriverVersion)));
99  nvmlCheck(nvmlShutdown());
100  */
101 
102  // ROCm driver version, e.g. 11.4
103  // the full version, like 11.4.1 or 11.4.100, is not reported
104  int driverVersion = 0;
105  hipCheck(hipDriverGetVersion(&driverVersion));
106 
107  // ROCm runtime version, e.g. 11.4
108  // the full version, like 11.4.1 or 11.4.108, is not reported
109  int runtimeVersion = 0;
110  hipCheck(hipRuntimeGetVersion(&runtimeVersion));
111 
112  edm::LogInfo log("ROCmService");
113  if (verbose_) {
114  /*
115  log << "AMD driver: " << systemDriverVersion << '\n';
116  */
117  log << "ROCm driver API: " << decodeVersion(driverVersion) << /*" (compiled with " << decodeVersion(ROCm_VERSION)
118  << ")" */
119  "\n";
120  log << "ROCm runtime API: " << decodeVersion(runtimeVersion)
121  << /*" (compiled with " << decodeVersion(ROCmRT_VERSION)
122  << ")" */
123  "\n";
124  log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
125  } else {
126  log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
127  << decodeVersion(driverVersion)
128  /*
129  << ", AMD driver version " << systemDriverVersion
130  */
131  ;
132  }
133 
134  auto const& limits = config.getUntrackedParameter<edm::ParameterSet>("limits");
135  /*
136  auto printfFifoSize = limits.getUntrackedParameter<int>("hipLimitPrintfFifoSize");
137  */
138  auto stackSize = limits.getUntrackedParameter<int>("hipLimitStackSize");
139  auto mallocHeapSize = limits.getUntrackedParameter<int>("hipLimitMallocHeapSize");
140  /*
141  auto devRuntimeSyncDepth = limits.getUntrackedParameter<int>("hipLimitDevRuntimeSyncDepth");
142  auto devRuntimePendingLaunchCount = limits.getUntrackedParameter<int>("hipLimitDevRuntimePendingLaunchCount");
143  */
144 
145  std::set<std::string> models;
146 
147  for (int i = 0; i < numberOfDevices_; ++i) {
148  // read information about the compute device.
149  // see the documentation of hipGetDeviceProperties() for more information.
150  hipDeviceProp_t properties;
151  hipCheck(hipGetDeviceProperties(&properties, i));
152  log << '\n' << "ROCm device " << i << ": " << properties.name;
153  if (verbose_) {
154  log << '\n';
155  }
156  models.insert(std::string(properties.name));
157 
158  // compute capabilities
159  computeCapabilities_.emplace_back(properties.major, properties.minor);
160  if (verbose_) {
161  log << " compute capability: " << properties.major << "." << properties.minor;
162  }
163  log << " (sm_" << properties.major << properties.minor << ")";
164  if (verbose_) {
165  log << '\n';
166  log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
167  log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
168  /*
169  log << " single to double performance: " << std::setw(8) << properties.singleToDoublePrecisionPerfRatio
170  << ":1\n";
171  */
172  }
173 
174  // compute mode
175  static constexpr const char* computeModeDescription[] = {
176  "default (shared)", // hipComputeModeDefault
177  "exclusive (single thread)", // hipComputeModeExclusive
178  "prohibited", // hipComputeModeProhibited
179  "exclusive (single process)", // hipComputeModeExclusiveProcess
180  "unknown"};
181  if (verbose_) {
182  log << " compute mode:" << std::right << std::setw(27)
183  << computeModeDescription[std::min(properties.computeMode,
184  static_cast<int>(std::size(computeModeDescription)) - 1)]
185  << '\n';
186  }
187 
188  // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
189  hipCheck(hipSetDevice(i));
190  hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
191 
192  // read the free and total amount of memory available for allocation by the device, in bytes.
193  // see the documentation of hipMemGetInfo() for more information.
194  if (verbose_) {
195  size_t freeMemory, totalMemory;
196  hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
197  log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
198  << totalMemory / (1 << 20) << " MB total\n";
199  log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
200  log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";
201  }
202 
203  // L1 cache behaviour
204  if (verbose_) {
205  /*
206  static constexpr const char* l1CacheModeDescription[] = {
207  "unknown", "local memory", "global memory", "local and global memory"};
208  int l1CacheMode = properties.localL1CacheSupported + 2 * properties.globalL1CacheSupported;
209  log << " L1 cache mode:" << std::setw(26) << std::right << l1CacheModeDescription[l1CacheMode] << '\n';
210  log << '\n';
211  */
212 
213  log << "Other capabilities\n";
214  log << " " << (properties.canMapHostMemory ? "can" : "cannot")
215  << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
216  log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
217  << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
218  log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
219  << " access pageable memory via the host's page tables\n";
220  /*
221  log << " " << (properties.canUseHostPointerForRegisteredMem ? "can" : "cannot")
222  << " access host registered memory at the same virtual address as the host\n";
223  log << " " << (properties.unifiedAddressing ? "shares" : "does not share")
224  << " a unified address space with the host\n";
225  */
226  log << " " << (properties.managedMemory ? "supports" : "does not support")
227  << " allocating managed memory on this system\n";
228  log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
229  << " coherently access managed memory concurrently with the host\n";
230  log << " "
231  << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
232  << " directly access managed memory on the device without migration\n";
233  log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
234  << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
235  log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
236  << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
237  log << '\n';
238  }
239 
240  // set and read the ROCm device flags.
241  // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
242  if (verbose_) {
243  log << "ROCm flags\n";
244  unsigned int flags;
245  hipCheck(hipGetDeviceFlags(&flags));
246  switch (flags & hipDeviceScheduleMask) {
247  case hipDeviceScheduleAuto:
248  log << " thread policy: default\n";
249  break;
250  case hipDeviceScheduleSpin:
251  log << " thread policy: spin\n";
252  break;
253  case hipDeviceScheduleYield:
254  log << " thread policy: yield\n";
255  break;
256  case hipDeviceScheduleBlockingSync:
257  log << " thread policy: blocking sync\n";
258  break;
259  default:
260  log << " thread policy: undefined\n";
261  }
262  if (flags & hipDeviceMapHost) {
263  log << " pinned host memory allocations: enabled\n";
264  } else {
265  log << " pinned host memory allocations: disabled\n";
266  }
267  if (flags & hipDeviceLmemResizeToMax) {
268  log << " kernel host memory reuse: enabled\n";
269  } else {
270  log << " kernel host memory reuse: disabled\n";
271  }
272  log << '\n';
273  }
274 
275  // set and read the ROCm resource limits.
276  // see the documentation of hipDeviceSetLimit() for more information.
277 
278  /*
279  // hipLimitPrintfFifoSize controls the size in bytes of the shared FIFO used by the
280  // printf() device system call.
281  if (printfFifoSize >= 0) {
282  setHipLimit(hipLimitPrintfFifoSize, "hipLimitPrintfFifoSize", printfFifoSize);
283  }
284  */
285  // hipLimitStackSize controls the stack size in bytes of each GPU thread.
286  if (stackSize >= 0) {
287  setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
288  }
289  // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
290  // and free() device system calls.
291  if (mallocHeapSize >= 0) {
292  setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
293  }
294  /*
295  if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
296  // hipLimitDevRuntimeSyncDepth controls the maximum nesting depth of a grid at which
297  // a thread can safely call hipDeviceSynchronize().
298  if (devRuntimeSyncDepth >= 0) {
299  setHipLimit(hipLimitDevRuntimeSyncDepth, "hipLimitDevRuntimeSyncDepth", devRuntimeSyncDepth);
300  }
301  // hipLimitDevRuntimePendingLaunchCount controls the maximum number of outstanding
302  // device runtime launches that can be made from the current device.
303  if (devRuntimePendingLaunchCount >= 0) {
304  setHipLimit(
305  hipLimitDevRuntimePendingLaunchCount, "hipLimitDevRuntimePendingLaunchCount", devRuntimePendingLaunchCount);
306  }
307  }
308  */
309 
310  if (verbose_) {
311  size_t value;
312  log << "ROCm limits\n";
313  /*
314  hipCheck(hipDeviceGetLimit(&value, hipLimitPrintfFifoSize));
315  log << " printf buffer size: " << std::setw(10) << value / (1 << 20) << " MB\n";
316  */
317  hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
318  log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
319  hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
320  log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
321  /*
322  if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
323  hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimeSyncDepth));
324  log << " runtime sync depth: " << std::setw(10) << value << '\n';
325  hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimePendingLaunchCount));
326  log << " runtime pending launch count: " << std::setw(10) << value << '\n';
327  }
328  */
329  }
330  }
331 
332  edm::Service<edm::ResourceInformation> resourceInformationService;
333  if (resourceInformationService.isAvailable()) {
334  std::vector<std::string> modelsV(models.begin(), models.end());
335  resourceInformationService->setGPUModels(modelsV);
336  /*
337  std::string nvidiaDriverVersion{systemDriverVersion};
338  resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
339  resourceInformationService->setCudaDriverVersion(driverVersion);
340  resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
341  */
342  }
343 
344  if (verbose_) {
345  log << '\n' << "ROCmService fully initialized";
346  }
347  enabled_ = true;
348 }
349 
351  if (enabled_) {
352  for (int i = 0; i < numberOfDevices_; ++i) {
353  hipCheck(hipSetDevice(i));
354  hipCheck(hipDeviceSynchronize());
355  // Explicitly destroys and cleans up all resources associated with the current device in the
356  // current process. Any subsequent API call to this device will reinitialize the device.
357  // Useful to check for memory leaks.
358  hipCheck(hipDeviceReset());
359  }
360  }
361 }
362 
365  desc.addUntracked<bool>("enabled", true);
366  desc.addUntracked<bool>("verbose", false);
367 
369  /*
370  limits.addUntracked<int>("hipLimitPrintfFifoSize", -1)
371  ->setComment("Size in bytes of the shared FIFO used by the printf() device system call.");
372  */
373  limits.addUntracked<int>("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
374  limits.addUntracked<int>("hipLimitMallocHeapSize", -1)
375  ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
376  limits.addUntracked<int>("hipLimitDevRuntimeSyncDepth", -1)
377  ->setComment("Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
378  limits.addUntracked<int>("hipLimitDevRuntimePendingLaunchCount", -1)
379  ->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
380  desc.addUntracked<edm::ParameterSetDescription>("limits", limits)
381  ->setComment(
382  "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
383  "the default value.");
384 
385  descriptions.add("ROCmService", desc);
386 }
387 
388 namespace edm {
389  namespace service {
390  inline bool isProcessWideService(ROCmService const*) { return true; }
391  } // namespace service
392 } // namespace edm
393 
size
Write out results.
~ROCmService() override
Definition: ROCmService.cc:350
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:102
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:98
Definition: config.py:1
Definition: models.py:1
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
Definition: ROCmService.cc:80
std::vector< std::pair< int, int > > computeCapabilities_
Definition: ROCmService.cc:48
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: ROCmService.cc:363
int numberOfDevices() const final
Definition: ROCmService.cc:34
Definition: value.py:1
bool enabled() const final
Definition: ROCmService.cc:32
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Definition: ROCmService.cc:53
Log< level::Info, false > LogInfo
int numberOfDevices_
Definition: ROCmService.cc:47
#define hipCheck(ARG,...)
Definition: hipCheck.h:52
void add(std::string const &label, ParameterSetDescription const &psetDescription)
HLT enums.
std::string decodeVersion(int version)
Definition: ROCmService.cc:75
bool isAvailable() const
Definition: Service.h:40
Log< level::Warning, false > LogWarning
virtual void setGPUModels(std::vector< std::string > const &)=0
std::pair< int, int > computeCapability(int device) const final
Definition: ROCmService.cc:37