CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Private Attributes
ROCmService Class Reference
Inheritance diagram for ROCmService:
ROCmInterface

Public Member Functions

std::pair< int, int > computeCapability (int device) const final
 
bool enabled () const final
 
int numberOfDevices () const final
 
 ROCmService (edm::ParameterSet const &config)
 Constructor. More...
 
 ~ROCmService () override
 
- Public Member Functions inherited from ROCmInterface
 ROCmInterface ()=default
 
virtual ~ROCmInterface ()=default
 

Static Public Member Functions

static void fillDescriptions (edm::ConfigurationDescriptions &descriptions)
 

Private Attributes

std::vector< std::pair< int, int > > computeCapabilities_
 
bool enabled_ = false
 
int numberOfDevices_ = 0
 
bool verbose_ = false
 

Detailed Description

Definition at line 25 of file ROCmService.cc.

Constructor & Destructor Documentation

◆ ROCmService()

ROCmService::ROCmService ( edm::ParameterSet const &  config)

Constructor.

Definition at line 80 of file ROCmService.cc.

References computeCapabilities_, decodeVersion(), enabled_, HLT_2023v12_cff::flags, hipCheck, mps_fire::i, edm::Service< T >::isAvailable(), TH2PolyOfflineMaps::limits, dqm-mbProfile::log, SiStripPI::min, numberOfDevices_, edm::ResourceInformation::setGPUModels(), setHipLimit(), findQualityFiles::size, mps_update::status, AlCaHLTBitMon_QueryRunRegistry::string, relativeConstraints::value, and verbose_.

80  : verbose_(config.getUntrackedParameter<bool>("verbose")) {
81  if (not config.getUntrackedParameter<bool>("enabled")) {
82  edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
83  return;
84  }
85 
86  auto status = hipGetDeviceCount(&numberOfDevices_);
87  if (hipSuccess != status) {
88  edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
89  << "Disabling the ROCmService.";
90  return;
91  }
93 
94  /*
95  // AMD system driver version, e.g. 470.57.02
96  char systemDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
97  nvmlCheck(nvmlInitWithFlags(NVML_INIT_FLAG_NO_GPUS | NVML_INIT_FLAG_NO_ATTACH));
98  nvmlCheck(nvmlSystemGetDriverVersion(systemDriverVersion, sizeof(systemDriverVersion)));
99  nvmlCheck(nvmlShutdown());
100  */
101 
102  // ROCm driver version, e.g. 11.4
103  // the full version, like 11.4.1 or 11.4.100, is not reported
104  int driverVersion = 0;
105  hipCheck(hipDriverGetVersion(&driverVersion));
106 
107  // ROCm runtime version, e.g. 11.4
108  // the full version, like 11.4.1 or 11.4.108, is not reported
109  int runtimeVersion = 0;
110  hipCheck(hipRuntimeGetVersion(&runtimeVersion));
111 
112  edm::LogInfo log("ROCmService");
113  if (verbose_) {
114  /*
115  log << "AMD driver: " << systemDriverVersion << '\n';
116  */
117  log << "ROCm driver API: " << decodeVersion(driverVersion) << /*" (compiled with " << decodeVersion(ROCm_VERSION)
118  << ")" */
119  "\n";
120  log << "ROCm runtime API: " << decodeVersion(runtimeVersion)
121  << /*" (compiled with " << decodeVersion(ROCmRT_VERSION)
122  << ")" */
123  "\n";
124  log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
125  } else {
126  log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
127  << decodeVersion(driverVersion)
128  /*
129  << ", AMD driver version " << systemDriverVersion
130  */
131  ;
132  }
133 
134  auto const& limits = config.getUntrackedParameter<edm::ParameterSet>("limits");
135  /*
136  auto printfFifoSize = limits.getUntrackedParameter<int>("hipLimitPrintfFifoSize");
137  */
138  auto stackSize = limits.getUntrackedParameter<int>("hipLimitStackSize");
139  auto mallocHeapSize = limits.getUntrackedParameter<int>("hipLimitMallocHeapSize");
140  /*
141  auto devRuntimeSyncDepth = limits.getUntrackedParameter<int>("hipLimitDevRuntimeSyncDepth");
142  auto devRuntimePendingLaunchCount = limits.getUntrackedParameter<int>("hipLimitDevRuntimePendingLaunchCount");
143  */
144 
145  std::set<std::string> models;
146 
147  for (int i = 0; i < numberOfDevices_; ++i) {
148  // read information about the compute device.
149  // see the documentation of hipGetDeviceProperties() for more information.
150  hipDeviceProp_t properties;
151  hipCheck(hipGetDeviceProperties(&properties, i));
152  log << '\n' << "ROCm device " << i << ": " << properties.name;
153  if (verbose_) {
154  log << '\n';
155  }
156  models.insert(std::string(properties.name));
157 
158  // compute capabilities
159  computeCapabilities_.emplace_back(properties.major, properties.minor);
160  if (verbose_) {
161  log << " compute capability: " << properties.major << "." << properties.minor;
162  }
163  log << " (sm_" << properties.major << properties.minor << ")";
164  if (verbose_) {
165  log << '\n';
166  log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
167  log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
168  /*
169  log << " single to double performance: " << std::setw(8) << properties.singleToDoublePrecisionPerfRatio
170  << ":1\n";
171  */
172  }
173 
174  // compute mode
175  static constexpr const char* computeModeDescription[] = {
176  "default (shared)", // hipComputeModeDefault
177  "exclusive (single thread)", // hipComputeModeExclusive
178  "prohibited", // hipComputeModeProhibited
179  "exclusive (single process)", // hipComputeModeExclusiveProcess
180  "unknown"};
181  if (verbose_) {
182  log << " compute mode:" << std::right << std::setw(27)
183  << computeModeDescription[std::min(properties.computeMode,
184  static_cast<int>(std::size(computeModeDescription)) - 1)]
185  << '\n';
186  }
187 
188  // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
189  hipCheck(hipSetDevice(i));
190  hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
191 
192  // read the free and total amount of memory available for allocation by the device, in bytes.
193  // see the documentation of hipMemGetInfo() for more information.
194  if (verbose_) {
195  size_t freeMemory, totalMemory;
196  hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
197  log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
198  << totalMemory / (1 << 20) << " MB total\n";
199  log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
200  log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";
201  }
202 
203  // L1 cache behaviour
204  if (verbose_) {
205  /*
206  static constexpr const char* l1CacheModeDescription[] = {
207  "unknown", "local memory", "global memory", "local and global memory"};
208  int l1CacheMode = properties.localL1CacheSupported + 2 * properties.globalL1CacheSupported;
209  log << " L1 cache mode:" << std::setw(26) << std::right << l1CacheModeDescription[l1CacheMode] << '\n';
210  log << '\n';
211  */
212 
213  log << "Other capabilities\n";
214  log << " " << (properties.canMapHostMemory ? "can" : "cannot")
215  << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
216  log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
217  << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
218  log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
219  << " access pageable memory via the host's page tables\n";
220  /*
221  log << " " << (properties.canUseHostPointerForRegisteredMem ? "can" : "cannot")
222  << " access host registered memory at the same virtual address as the host\n";
223  log << " " << (properties.unifiedAddressing ? "shares" : "does not share")
224  << " a unified address space with the host\n";
225  */
226  log << " " << (properties.managedMemory ? "supports" : "does not support")
227  << " allocating managed memory on this system\n";
228  log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
229  << " coherently access managed memory concurrently with the host\n";
230  log << " "
231  << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
232  << " directly access managed memory on the device without migration\n";
233  log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
234  << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
235  log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
236  << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
237  log << '\n';
238  }
239 
240  // set and read the ROCm device flags.
241  // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
242  if (verbose_) {
243  log << "ROCm flags\n";
244  unsigned int flags;
245  hipCheck(hipGetDeviceFlags(&flags));
246  switch (flags & hipDeviceScheduleMask) {
247  case hipDeviceScheduleAuto:
248  log << " thread policy: default\n";
249  break;
250  case hipDeviceScheduleSpin:
251  log << " thread policy: spin\n";
252  break;
253  case hipDeviceScheduleYield:
254  log << " thread policy: yield\n";
255  break;
256  case hipDeviceScheduleBlockingSync:
257  log << " thread policy: blocking sync\n";
258  break;
259  default:
260  log << " thread policy: undefined\n";
261  }
262  if (flags & hipDeviceMapHost) {
263  log << " pinned host memory allocations: enabled\n";
264  } else {
265  log << " pinned host memory allocations: disabled\n";
266  }
267  if (flags & hipDeviceLmemResizeToMax) {
268  log << " kernel host memory reuse: enabled\n";
269  } else {
270  log << " kernel host memory reuse: disabled\n";
271  }
272  log << '\n';
273  }
274 
275  // set and read the ROCm resource limits.
276  // see the documentation of hipDeviceSetLimit() for more information.
277 
278  /*
279  // hipLimitPrintfFifoSize controls the size in bytes of the shared FIFO used by the
280  // printf() device system call.
281  if (printfFifoSize >= 0) {
282  setHipLimit(hipLimitPrintfFifoSize, "hipLimitPrintfFifoSize", printfFifoSize);
283  }
284  */
285  // hipLimitStackSize controls the stack size in bytes of each GPU thread.
286  if (stackSize >= 0) {
287  setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
288  }
289  // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
290  // and free() device system calls.
291  if (mallocHeapSize >= 0) {
292  setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
293  }
294  /*
295  if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
296  // hipLimitDevRuntimeSyncDepth controls the maximum nesting depth of a grid at which
297  // a thread can safely call hipDeviceSynchronize().
298  if (devRuntimeSyncDepth >= 0) {
299  setHipLimit(hipLimitDevRuntimeSyncDepth, "hipLimitDevRuntimeSyncDepth", devRuntimeSyncDepth);
300  }
301  // hipLimitDevRuntimePendingLaunchCount controls the maximum number of outstanding
302  // device runtime launches that can be made from the current device.
303  if (devRuntimePendingLaunchCount >= 0) {
304  setHipLimit(
305  hipLimitDevRuntimePendingLaunchCount, "hipLimitDevRuntimePendingLaunchCount", devRuntimePendingLaunchCount);
306  }
307  }
308  */
309 
310  if (verbose_) {
311  size_t value;
312  log << "ROCm limits\n";
313  /*
314  hipCheck(hipDeviceGetLimit(&value, hipLimitPrintfFifoSize));
315  log << " printf buffer size: " << std::setw(10) << value / (1 << 20) << " MB\n";
316  */
317  hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
318  log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
319  hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
320  log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
321  /*
322  if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
323  hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimeSyncDepth));
324  log << " runtime sync depth: " << std::setw(10) << value << '\n';
325  hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimePendingLaunchCount));
326  log << " runtime pending launch count: " << std::setw(10) << value << '\n';
327  }
328  */
329  }
330  }
331 
332  edm::Service<edm::ResourceInformation> resourceInformationService;
333  if (resourceInformationService.isAvailable()) {
334  std::vector<std::string> modelsV(models.begin(), models.end());
335  resourceInformationService->setGPUModels(modelsV);
336  /*
337  std::string nvidiaDriverVersion{systemDriverVersion};
338  resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
339  resourceInformationService->setCudaDriverVersion(driverVersion);
340  resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
341  */
342  }
343 
344  if (verbose_) {
345  log << '\n' << "ROCmService fully initialized";
346  }
347  enabled_ = true;
348 }
size
Write out results.
Definition: config.py:1
Definition: models.py:1
std::vector< std::pair< int, int > > computeCapabilities_
Definition: ROCmService.cc:48
Definition: value.py:1
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Definition: ROCmService.cc:53
Log< level::Info, false > LogInfo
int numberOfDevices_
Definition: ROCmService.cc:47
#define hipCheck(ARG,...)
Definition: hipCheck.h:52
std::string decodeVersion(int version)
Definition: ROCmService.cc:75
bool isAvailable() const
Definition: Service.h:40
Log< level::Warning, false > LogWarning
virtual void setGPUModels(std::vector< std::string > const &)=0

◆ ~ROCmService()

ROCmService::~ROCmService ( )
override

Definition at line 350 of file ROCmService.cc.

References enabled_, hipCheck, mps_fire::i, and numberOfDevices_.

350  {
351  if (enabled_) {
352  for (int i = 0; i < numberOfDevices_; ++i) {
353  hipCheck(hipSetDevice(i));
354  hipCheck(hipDeviceSynchronize());
355  // Explicitly destroys and cleans up all resources associated with the current device in the
356  // current process. Any subsequent API call to this device will reinitialize the device.
357  // Useful to check for memory leaks.
358  hipCheck(hipDeviceReset());
359  }
360  }
361 }
int numberOfDevices_
Definition: ROCmService.cc:47
#define hipCheck(ARG,...)
Definition: hipCheck.h:52

Member Function Documentation

◆ computeCapability()

std::pair<int, int> ROCmService::computeCapability ( int  device) const
inlinefinalvirtual

Implements ROCmInterface.

Definition at line 37 of file ROCmService.cc.

References computeCapabilities_, findQualityFiles::size, and to_string().

37  {
38  int size = computeCapabilities_.size();
39  if (device < 0 or device >= size) {
40  throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
41  std::to_string(size - 1));
42  }
43  return computeCapabilities_[device];
44  }
size
Write out results.
static std::string to_string(const XMLCh *ch)
std::vector< std::pair< int, int > > computeCapabilities_
Definition: ROCmService.cc:48

◆ enabled()

bool ROCmService::enabled ( ) const
inlinefinalvirtual

Implements ROCmInterface.

Definition at line 32 of file ROCmService.cc.

References enabled_.

32 { return enabled_; }

◆ fillDescriptions()

void ROCmService::fillDescriptions ( edm::ConfigurationDescriptions descriptions)
static

Definition at line 363 of file ROCmService.cc.

References edm::ConfigurationDescriptions::add(), submitPVResolutionJobs::desc, and TH2PolyOfflineMaps::limits.

363  {
365  desc.addUntracked<bool>("enabled", true);
366  desc.addUntracked<bool>("verbose", false);
367 
369  /*
370  limits.addUntracked<int>("hipLimitPrintfFifoSize", -1)
371  ->setComment("Size in bytes of the shared FIFO used by the printf() device system call.");
372  */
373  limits.addUntracked<int>("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
374  limits.addUntracked<int>("hipLimitMallocHeapSize", -1)
375  ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
376  limits.addUntracked<int>("hipLimitDevRuntimeSyncDepth", -1)
377  ->setComment("Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
378  limits.addUntracked<int>("hipLimitDevRuntimePendingLaunchCount", -1)
379  ->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
380  desc.addUntracked<edm::ParameterSetDescription>("limits", limits)
381  ->setComment(
382  "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
383  "the default value.");
384 
385  descriptions.add("ROCmService", desc);
386 }
void add(std::string const &label, ParameterSetDescription const &psetDescription)

◆ numberOfDevices()

int ROCmService::numberOfDevices ( ) const
inlinefinalvirtual

Implements ROCmInterface.

Definition at line 34 of file ROCmService.cc.

References numberOfDevices_.

34 { return numberOfDevices_; }
int numberOfDevices_
Definition: ROCmService.cc:47

Member Data Documentation

◆ computeCapabilities_

std::vector<std::pair<int, int> > ROCmService::computeCapabilities_
private

Definition at line 48 of file ROCmService.cc.

Referenced by computeCapability(), and ROCmService().

◆ enabled_

bool ROCmService::enabled_ = false
private

Definition at line 49 of file ROCmService.cc.

Referenced by enabled(), ROCmService(), and ~ROCmService().

◆ numberOfDevices_

int ROCmService::numberOfDevices_ = 0
private

Definition at line 47 of file ROCmService.cc.

Referenced by numberOfDevices(), ROCmService(), and ~ROCmService().

◆ verbose_

bool ROCmService::verbose_ = false
private

Definition at line 50 of file ROCmService.cc.

Referenced by ROCmService().