CMS 3D CMS Logo

ROCmService.cc
Go to the documentation of this file.
1 #include <iomanip>
2 #include <iostream>
3 #include <limits>
4 #include <set>
5 #include <string>
6 #include <vector>
7 
8 #include <hip/hip_runtime.h>
9 #if HIP_VERSION_MAJOR >= 6
10 // the location of rocm_version.h changed in HIP/ROCm 6.0
11 #include <rocm-core/rocm_version.h>
12 #else
13 #include <rocm_version.h>
14 #endif // HIP_VERSION_MAJOR
15 #include <rocm_smi/rocm_smi.h>
16 
26 
27 class ROCmService : public ROCmInterface {
28 public:
30  ~ROCmService() override;
31 
32  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
33 
34  bool enabled() const final { return enabled_; }
35 
36  int numberOfDevices() const final { return numberOfDevices_; }
37 
38  // Return the (major, minor) compute capability of the given device.
39  std::pair<int, int> computeCapability(int device) const final {
40  int size = computeCapabilities_.size();
41  if (device < 0 or device >= size) {
42  throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
43  std::to_string(size - 1));
44  }
45  return computeCapabilities_[device];
46  }
47 
48 private:
50  std::vector<std::pair<int, int>> computeCapabilities_;
51  bool enabled_ = false;
52  bool verbose_ = false;
53 };
54 
55 void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
56 #if HIP_VERSION >= 50400000
57  // read the current device
58  int device;
59  hipCheck(hipGetDevice(&device));
60  // try to set the requested limit
61  auto result = hipDeviceSetLimit(limit, request);
62  if (hipErrorUnsupportedLimit == result) {
63  edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
64  return;
65  }
66  // read back the limit value
67  size_t value;
68  result = hipDeviceGetLimit(&value, limit);
69  if (hipSuccess != result) {
70  edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
71  << request << ", current value is " << value;
72  } else if (value != request) {
73  edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
74  << " instead of requested " << request;
75  }
76 #else
77  edm::LogWarning("ROCmService") << "ROCm versions below 5.4.0 do not support setting device limits.";
78 #endif
79 }
80 
82  // decode 50631061 as 5.6.31061
83  return std::to_string(version / 10000000) + '.' + std::to_string(version / 100000 % 100) + '.' +
84  std::to_string(version % 100000);
85 }
86 
88 ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter<bool>("verbose")) {
89  if (not config.getUntrackedParameter<bool>("enabled")) {
90  edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
91  return;
92  }
93 
94  auto status = hipGetDeviceCount(&numberOfDevices_);
95  if (hipSuccess != status) {
96  edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
97  << "Disabling the ROCmService.";
98  return;
99  }
101 
102  // AMD system driver version, e.g. 5.16.9.22.20 or 6.1.5
103  char systemDriverVersion[256];
104  rsmiCheck(rsmi_init(0x00));
105  rsmiCheck(rsmi_version_str_get(RSMI_SW_COMP_DRIVER, systemDriverVersion, sizeof(systemDriverVersion) - 1));
106  rsmiCheck(rsmi_shut_down());
107 
108  // ROCm driver version, e.g. 11.4
109  // the full version, like 11.4.1 or 11.4.100, is not reported
110  int driverVersion = 0;
111  hipCheck(hipDriverGetVersion(&driverVersion));
112 
113  // ROCm runtime version, e.g. 11.4
114  // the full version, like 11.4.1 or 11.4.108, is not reported
115  int runtimeVersion = 0;
116  hipCheck(hipRuntimeGetVersion(&runtimeVersion));
117 
118  edm::LogInfo log("ROCmService");
119  if (verbose_) {
120  log << "AMD kernel driver: " << systemDriverVersion << '\n';
121  log << "ROCm driver API: " << decodeVersion(driverVersion) << " (compiled with ROCm " <<
122 #ifdef ROCM_BUILD_INFO
123  // ROCM_BUILD_INFO has been introduced in ROCm 5.5.0
124  ROCM_BUILD_INFO
125 #else
126  ROCM_VERSION_MAJOR << '.' << ROCM_VERSION_MINOR << '.' << ROCM_VERSION_PATCH
127 #endif
128  << ")\n";
129  log << "ROCm runtime API: " << decodeVersion(runtimeVersion) << " (compiled with HIP " << HIP_VERSION_MAJOR << '.'
130  << HIP_VERSION_MINOR << '.' << HIP_VERSION_PATCH << ")\n";
131  log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
132  } else {
133  log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
134  << decodeVersion(driverVersion) << ", AMD driver version " << systemDriverVersion;
135  }
136 
137 #if HIP_VERSION >= 50400000
138  auto const& limits = config.getUntrackedParameter<edm::ParameterSet>("limits");
139  auto stackSize = limits.getUntrackedParameter<int>("hipLimitStackSize");
140  auto mallocHeapSize = limits.getUntrackedParameter<int>("hipLimitMallocHeapSize");
141 #endif
142 
143  std::set<std::string> models;
144 
145  for (int i = 0; i < numberOfDevices_; ++i) {
146  // read information about the compute device.
147  // see the documentation of hipGetDeviceProperties() for more information.
148  hipDeviceProp_t properties;
149  hipCheck(hipGetDeviceProperties(&properties, i));
150  log << '\n' << "ROCm device " << i << ": " << properties.name;
151  if (verbose_) {
152  log << '\n';
153  }
154  models.insert(std::string(properties.name));
155 
156  // compute capabilities
157  computeCapabilities_.emplace_back(properties.major, properties.minor);
158  if (verbose_) {
159  log << " compute capability: " << properties.gcnArchName;
160  } else {
161  log << " (" << properties.gcnArchName << ")";
162  }
163  if (verbose_) {
164  log << '\n';
165  log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
166  log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
167  // ROCm does not provide single to double performance ratio
168  }
169 
170  // compute mode
171  static constexpr const char* computeModeDescription[] = {
172  "default (shared)", // hipComputeModeDefault
173  "exclusive (single thread)", // hipComputeModeExclusive
174  "prohibited", // hipComputeModeProhibited
175  "exclusive (single process)", // hipComputeModeExclusiveProcess
176  "unknown"};
177  if (verbose_) {
178  log << " compute mode:" << std::right << std::setw(27)
179  << computeModeDescription[std::min(properties.computeMode,
180  static_cast<int>(std::size(computeModeDescription)) - 1)]
181  << '\n';
182  }
183 
184  // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
185  hipCheck(hipSetDevice(i));
186  hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
187 
188  if (verbose_) {
189  // read the free and total amount of memory available for allocation by the device, in bytes.
190  // see the documentation of hipMemGetInfo() for more information.
191  size_t freeMemory = 0;
192  size_t totalMemory = 0;
193  hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
194  log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
195  << totalMemory / (1 << 20) << " MB total\n";
196  log << " constant memory: " << std::setw(8) << properties.totalConstMem / (1 << 10) << " kB\n";
197  log << " L2 cache size: " << std::setw(8) << properties.l2CacheSize / (1 << 10) << " kB\n";
198 
199  log << '\n';
200 
201  // other capabilities
202  log << "Other capabilities\n";
203  log << " " << (properties.canMapHostMemory ? "can" : "cannot")
204  << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
205  log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
206  << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
207  log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
208  << " access pageable memory via the host's page tables\n";
209  log << " " << (properties.managedMemory ? "supports" : "does not support")
210  << " allocating managed memory on this system\n";
211  log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
212  << " coherently access managed memory concurrently with the host\n";
213  log << " "
214  << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
215  << " directly access managed memory on the device without migration\n";
216  log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
217  << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
218  log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
219  << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
220  log << '\n';
221  }
222 
223  // set and read the ROCm device flags.
224  // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
225  if (verbose_) {
226  log << "ROCm flags\n";
227  unsigned int flags;
228  hipCheck(hipGetDeviceFlags(&flags));
229  switch (flags & hipDeviceScheduleMask) {
230  case hipDeviceScheduleAuto:
231  log << " thread policy: default\n";
232  break;
233  case hipDeviceScheduleSpin:
234  log << " thread policy: spin\n";
235  break;
236  case hipDeviceScheduleYield:
237  log << " thread policy: yield\n";
238  break;
239  case hipDeviceScheduleBlockingSync:
240  log << " thread policy: blocking sync\n";
241  break;
242  default:
243  log << " thread policy: undefined\n";
244  }
245  if (flags & hipDeviceMapHost) {
246  log << " pinned host memory allocations: enabled\n";
247  } else {
248  log << " pinned host memory allocations: disabled\n";
249  }
250  if (flags & hipDeviceLmemResizeToMax) {
251  log << " kernel host memory reuse: enabled\n";
252  } else {
253  log << " kernel host memory reuse: disabled\n";
254  }
255  log << '\n';
256  }
257 
258  // set and read the ROCm resource limits.
259  // see the documentation of hipDeviceSetLimit() for more information.
260 
261 #if HIP_VERSION >= 50400000
262  // hipLimitStackSize controls the stack size in bytes of each GPU thread.
263  if (stackSize >= 0) {
264  setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
265  }
266  // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
267  // and free() device system calls.
268  if (mallocHeapSize >= 0) {
269  setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
270  }
271 #endif
272 
273  if (verbose_) {
274  size_t value;
275  log << "ROCm limits\n";
276 #if HIP_VERSION >= 50400000
277  hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
278  log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
279 #endif
280  hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
281  log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
282  }
283  }
284 
285  edm::Service<edm::ResourceInformation> resourceInformationService;
286  if (resourceInformationService.isAvailable()) {
287  std::vector<std::string> modelsV(models.begin(), models.end());
288  resourceInformationService->setGPUModels(modelsV);
289  /*
290  std::string nvidiaDriverVersion{systemDriverVersion};
291  resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
292  resourceInformationService->setCudaDriverVersion(driverVersion);
293  resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
294  */
295  }
296 
297  if (verbose_) {
298  log << '\n' << "ROCmService fully initialized";
299  }
300  enabled_ = true;
301 }
302 
304  if (enabled_) {
305  for (int i = 0; i < numberOfDevices_; ++i) {
306  hipCheck(hipSetDevice(i));
307  hipCheck(hipDeviceSynchronize());
308  // Explicitly destroys and cleans up all resources associated with the current device in the
309  // current process. Any subsequent API call to this device will reinitialize the device.
310  // Useful to check for memory leaks.
311  hipCheck(hipDeviceReset());
312  }
313  }
314 }
315 
318  desc.addUntracked<bool>("enabled", true);
319  desc.addUntracked<bool>("verbose", false);
320 
321 #if HIP_VERSION >= 50400000
323  limits.addUntracked<int>("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
324  limits.addUntracked<int>("hipLimitMallocHeapSize", -1)
325  ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
326  desc.addUntracked<edm::ParameterSetDescription>("limits", limits)
327  ->setComment(
328  "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
329  "the default value.");
330 #endif
331 
332  descriptions.add("ROCmService", desc);
333 }
334 
335 namespace edm {
336  namespace service {
337  inline bool isProcessWideService(ROCmService const*) { return true; }
338  } // namespace service
339 } // namespace edm
340 
size
Write out results.
~ROCmService() override
Definition: ROCmService.cc:303
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:102
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:98
Definition: config.py:1
Definition: models.py:1
#define rsmiCheck(ARG,...)
Definition: rsmiCheck.h:53
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
Definition: ROCmService.cc:88
std::vector< std::pair< int, int > > computeCapabilities_
Definition: ROCmService.cc:50
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: ROCmService.cc:316
int numberOfDevices() const final
Definition: ROCmService.cc:36
Definition: value.py:1
bool enabled() const final
Definition: ROCmService.cc:34
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Definition: ROCmService.cc:55
Log< level::Info, false > LogInfo
int numberOfDevices_
Definition: ROCmService.cc:49
#define hipCheck(ARG,...)
Definition: hipCheck.h:52
void add(std::string const &label, ParameterSetDescription const &psetDescription)
HLT enums.
std::string decodeVersion(int version)
Definition: ROCmService.cc:81
bool isAvailable() const
Definition: Service.h:40
Log< level::Warning, false > LogWarning
virtual void setGPUModels(std::vector< std::string > const &)=0
std::pair< int, int > computeCapability(int device) const final
Definition: ROCmService.cc:39