8 #include <hip/hip_runtime.h> 39 if (device < 0 or device >=
size) {
40 throw std::out_of_range(
"Invalid device index" +
std::to_string(device) +
": the valid range is from 0 to " +
59 if (hipErrorUnsupportedLimit ==
result) {
60 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": unsupported limit \"" <<
name <<
"\"";
66 if (hipSuccess !=
result) {
67 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": failed to set limit \"" <<
name <<
"\" to " 68 << request <<
", current value is " <<
value;
69 }
else if (
value != request) {
71 <<
" instead of requested " << request;
81 if (not
config.getUntrackedParameter<
bool>(
"enabled")) {
82 edm::LogInfo(
"ROCmService") <<
"ROCmService disabled by configuration";
87 if (hipSuccess !=
status) {
88 edm::LogWarning(
"ROCmService") <<
"Failed to initialize the ROCm runtime.\n" 89 <<
"Disabling the ROCmService.";
104 int driverVersion = 0;
105 hipCheck(hipDriverGetVersion(&driverVersion));
109 int runtimeVersion = 0;
110 hipCheck(hipRuntimeGetVersion(&runtimeVersion));
124 log <<
"ROCm runtime successfully initialised, found " <<
numberOfDevices_ <<
" compute devices.\n";
126 log <<
"ROCm runtime version " <<
decodeVersion(runtimeVersion) <<
", driver version " 138 auto stackSize =
limits.getUntrackedParameter<
int>(
"hipLimitStackSize");
139 auto mallocHeapSize =
limits.getUntrackedParameter<
int>(
"hipLimitMallocHeapSize");
145 std::set<std::string>
models;
150 hipDeviceProp_t properties;
151 hipCheck(hipGetDeviceProperties(&properties,
i));
152 log <<
'\n' <<
"ROCm device " <<
i <<
": " << properties.name;
161 log <<
" compute capability: " << properties.major <<
"." << properties.minor;
163 log <<
" (sm_" << properties.major << properties.minor <<
")";
166 log <<
" streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount <<
'\n';
167 log <<
" ROCm cores: " << std::setw(28) <<
"not yet implemented" <<
'\n';
175 static constexpr
const char* computeModeDescription[] = {
177 "exclusive (single thread)",
179 "exclusive (single process)",
182 log <<
" compute mode:" << std::right << std::setw(27)
183 << computeModeDescription[
std::min(properties.computeMode,
184 static_cast<int>(
std::size(computeModeDescription)) - 1)]
190 hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
195 size_t freeMemory, totalMemory;
196 hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
197 log <<
" memory: " << std::setw(6) << freeMemory / (1 << 20) <<
" MB free / " << std::setw(6)
198 << totalMemory / (1 << 20) <<
" MB total\n";
199 log <<
" constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) <<
" kB\n";
200 log <<
" L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) <<
" kB\n";
213 log <<
"Other capabilities\n";
214 log <<
" " << (properties.canMapHostMemory ?
"can" :
"cannot")
215 <<
" map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
216 log <<
" " << (properties.pageableMemoryAccess ?
"supports" :
"does not support")
217 <<
" coherently accessing pageable memory without calling hipHostRegister() on it\n";
218 log <<
" " << (properties.pageableMemoryAccessUsesHostPageTables ?
"can" :
"cannot")
219 <<
" access pageable memory via the host's page tables\n";
226 log <<
" " << (properties.managedMemory ?
"supports" :
"does not support")
227 <<
" allocating managed memory on this system\n";
228 log <<
" " << (properties.concurrentManagedAccess ?
"can" :
"cannot")
229 <<
" coherently access managed memory concurrently with the host\n";
231 <<
"the host " << (properties.directManagedMemAccessFromHost ?
"can" :
"cannot")
232 <<
" directly access managed memory on the device without migration\n";
233 log <<
" " << (properties.cooperativeLaunch ?
"supports" :
"does not support")
234 <<
" launching cooperative kernels via hipLaunchCooperativeKernel()\n";
235 log <<
" " << (properties.cooperativeMultiDeviceLaunch ?
"supports" :
"does not support")
236 <<
" launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
243 log <<
"ROCm flags\n";
246 switch (
flags & hipDeviceScheduleMask) {
247 case hipDeviceScheduleAuto:
248 log <<
" thread policy: default\n";
250 case hipDeviceScheduleSpin:
251 log <<
" thread policy: spin\n";
253 case hipDeviceScheduleYield:
254 log <<
" thread policy: yield\n";
256 case hipDeviceScheduleBlockingSync:
257 log <<
" thread policy: blocking sync\n";
260 log <<
" thread policy: undefined\n";
262 if (
flags & hipDeviceMapHost) {
263 log <<
" pinned host memory allocations: enabled\n";
265 log <<
" pinned host memory allocations: disabled\n";
267 if (
flags & hipDeviceLmemResizeToMax) {
268 log <<
" kernel host memory reuse: enabled\n";
270 log <<
" kernel host memory reuse: disabled\n";
286 if (stackSize >= 0) {
287 setHipLimit(hipLimitStackSize,
"hipLimitStackSize", stackSize);
291 if (mallocHeapSize >= 0) {
292 setHipLimit(hipLimitMallocHeapSize,
"hipLimitMallocHeapSize", mallocHeapSize);
312 log <<
"ROCm limits\n";
318 log <<
" stack size: " << std::setw(10) <<
value / (1 << 10) <<
" kB\n";
320 log <<
" malloc heap size: " << std::setw(10) <<
value / (1 << 20) <<
" MB\n";
334 std::vector<std::string> modelsV(
models.begin(),
models.end());
345 log <<
'\n' <<
"ROCmService fully initialized";
365 desc.addUntracked<
bool>(
"enabled",
true);
366 desc.addUntracked<
bool>(
"verbose",
false);
373 limits.addUntracked<
int>(
"hipLimitStackSize", -1)->setComment(
"Stack size in bytes of each GPU thread.");
374 limits.addUntracked<
int>(
"hipLimitMallocHeapSize", -1)
375 ->setComment(
"Size in bytes of the heap used by the malloc() and free() device system calls.");
376 limits.addUntracked<
int>(
"hipLimitDevRuntimeSyncDepth", -1)
377 ->setComment(
"Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
378 limits.addUntracked<
int>(
"hipLimitDevRuntimePendingLaunchCount", -1)
379 ->setComment(
"Maximum number of outstanding device runtime launches that can be made from the current device.");
382 "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps " 383 "the default value.");
385 descriptions.
add(
"ROCmService",
desc);
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
bool isProcessWideService(TFileService const *)
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
std::vector< std::pair< int, int > > computeCapabilities_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
int numberOfDevices() const final
bool enabled() const final
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Log< level::Info, false > LogInfo
#define hipCheck(ARG,...)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
std::string decodeVersion(int version)
Log< level::Warning, false > LogWarning
std::pair< int, int > computeCapability(int device) const final