8 #include <hip/hip_runtime.h> 9 #if HIP_VERSION_MAJOR >= 6 11 #include <rocm-core/rocm_version.h> 13 #include <rocm_version.h> 14 #endif // HIP_VERSION_MAJOR 15 #include <rocm_smi/rocm_smi.h> 41 if (device < 0 or device >=
size) {
42 throw std::out_of_range(
"Invalid device index" +
std::to_string(device) +
": the valid range is from 0 to " +
56 #if HIP_VERSION >= 50400000 62 if (hipErrorUnsupportedLimit ==
result) {
63 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": unsupported limit \"" <<
name <<
"\"";
69 if (hipSuccess !=
result) {
70 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": failed to set limit \"" <<
name <<
"\" to " 71 << request <<
", current value is " <<
value;
72 }
else if (
value != request) {
74 <<
" instead of requested " << request;
77 edm::LogWarning(
"ROCmService") <<
"ROCm versions below 5.4.0 do not support setting device limits.";
89 if (not
config.getUntrackedParameter<
bool>(
"enabled")) {
90 edm::LogInfo(
"ROCmService") <<
"ROCmService disabled by configuration";
95 if (hipSuccess !=
status) {
96 edm::LogWarning(
"ROCmService") <<
"Failed to initialize the ROCm runtime.\n" 97 <<
"Disabling the ROCmService.";
103 char systemDriverVersion[256];
105 rsmiCheck(rsmi_version_str_get(RSMI_SW_COMP_DRIVER, systemDriverVersion,
sizeof(systemDriverVersion) - 1));
110 int driverVersion = 0;
111 hipCheck(hipDriverGetVersion(&driverVersion));
115 int runtimeVersion = 0;
116 hipCheck(hipRuntimeGetVersion(&runtimeVersion));
120 log <<
"AMD kernel driver: " << systemDriverVersion <<
'\n';
121 log <<
"ROCm driver API: " <<
decodeVersion(driverVersion) <<
" (compiled with ROCm " <<
122 #ifdef ROCM_BUILD_INFO 126 ROCM_VERSION_MAJOR <<
'.' << ROCM_VERSION_MINOR <<
'.' << ROCM_VERSION_PATCH
129 log <<
"ROCm runtime API: " <<
decodeVersion(runtimeVersion) <<
" (compiled with HIP " << HIP_VERSION_MAJOR <<
'.' 130 << HIP_VERSION_MINOR <<
'.' << HIP_VERSION_PATCH <<
")\n";
131 log <<
"ROCm runtime successfully initialised, found " <<
numberOfDevices_ <<
" compute devices.\n";
133 log <<
"ROCm runtime version " <<
decodeVersion(runtimeVersion) <<
", driver version " 134 <<
decodeVersion(driverVersion) <<
", AMD driver version " << systemDriverVersion;
137 #if HIP_VERSION >= 50400000 139 auto stackSize =
limits.getUntrackedParameter<
int>(
"hipLimitStackSize");
140 auto mallocHeapSize =
limits.getUntrackedParameter<
int>(
"hipLimitMallocHeapSize");
143 std::set<std::string>
models;
148 hipDeviceProp_t properties;
149 hipCheck(hipGetDeviceProperties(&properties,
i));
150 log <<
'\n' <<
"ROCm device " <<
i <<
": " << properties.name;
159 log <<
" compute capability: " << properties.gcnArchName;
161 log <<
" (" << properties.gcnArchName <<
")";
165 log <<
" streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount <<
'\n';
166 log <<
" ROCm cores: " << std::setw(28) <<
"not yet implemented" <<
'\n';
171 static constexpr const char* computeModeDescription[] = {
173 "exclusive (single thread)",
175 "exclusive (single process)",
178 log <<
" compute mode:" << std::right << std::setw(27)
179 << computeModeDescription[
std::min(properties.computeMode,
180 static_cast<int>(std::size(computeModeDescription)) - 1)]
186 hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
191 size_t freeMemory = 0;
192 size_t totalMemory = 0;
193 hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
194 log <<
" memory: " << std::setw(6) << freeMemory / (1 << 20) <<
" MB free / " << std::setw(6)
195 << totalMemory / (1 << 20) <<
" MB total\n";
196 log <<
" constant memory: " << std::setw(8) << properties.totalConstMem / (1 << 10) <<
" kB\n";
197 log <<
" L2 cache size: " << std::setw(8) << properties.l2CacheSize / (1 << 10) <<
" kB\n";
202 log <<
"Other capabilities\n";
203 log <<
" " << (properties.canMapHostMemory ?
"can" :
"cannot")
204 <<
" map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
205 log <<
" " << (properties.pageableMemoryAccess ?
"supports" :
"does not support")
206 <<
" coherently accessing pageable memory without calling hipHostRegister() on it\n";
207 log <<
" " << (properties.pageableMemoryAccessUsesHostPageTables ?
"can" :
"cannot")
208 <<
" access pageable memory via the host's page tables\n";
209 log <<
" " << (properties.managedMemory ?
"supports" :
"does not support")
210 <<
" allocating managed memory on this system\n";
211 log <<
" " << (properties.concurrentManagedAccess ?
"can" :
"cannot")
212 <<
" coherently access managed memory concurrently with the host\n";
214 <<
"the host " << (properties.directManagedMemAccessFromHost ?
"can" :
"cannot")
215 <<
" directly access managed memory on the device without migration\n";
216 log <<
" " << (properties.cooperativeLaunch ?
"supports" :
"does not support")
217 <<
" launching cooperative kernels via hipLaunchCooperativeKernel()\n";
218 log <<
" " << (properties.cooperativeMultiDeviceLaunch ?
"supports" :
"does not support")
219 <<
" launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
226 log <<
"ROCm flags\n";
229 switch (
flags & hipDeviceScheduleMask) {
230 case hipDeviceScheduleAuto:
231 log <<
" thread policy: default\n";
233 case hipDeviceScheduleSpin:
234 log <<
" thread policy: spin\n";
236 case hipDeviceScheduleYield:
237 log <<
" thread policy: yield\n";
239 case hipDeviceScheduleBlockingSync:
240 log <<
" thread policy: blocking sync\n";
243 log <<
" thread policy: undefined\n";
245 if (
flags & hipDeviceMapHost) {
246 log <<
" pinned host memory allocations: enabled\n";
248 log <<
" pinned host memory allocations: disabled\n";
250 if (
flags & hipDeviceLmemResizeToMax) {
251 log <<
" kernel host memory reuse: enabled\n";
253 log <<
" kernel host memory reuse: disabled\n";
261 #if HIP_VERSION >= 50400000 263 if (stackSize >= 0) {
264 setHipLimit(hipLimitStackSize,
"hipLimitStackSize", stackSize);
268 if (mallocHeapSize >= 0) {
269 setHipLimit(hipLimitMallocHeapSize,
"hipLimitMallocHeapSize", mallocHeapSize);
275 log <<
"ROCm limits\n";
276 #if HIP_VERSION >= 50400000 278 log <<
" stack size: " << std::setw(10) <<
value / (1 << 10) <<
" kB\n";
281 log <<
" malloc heap size: " << std::setw(10) <<
value / (1 << 20) <<
" MB\n";
287 std::vector<std::string> modelsV(
models.begin(),
models.end());
298 log <<
'\n' <<
"ROCmService fully initialized";
318 desc.addUntracked<
bool>(
"enabled",
true);
319 desc.addUntracked<
bool>(
"verbose",
false);
321 #if HIP_VERSION >= 50400000 323 limits.addUntracked<
int>(
"hipLimitStackSize", -1)->setComment(
"Stack size in bytes of each GPU thread.");
324 limits.addUntracked<
int>(
"hipLimitMallocHeapSize", -1)
325 ->setComment(
"Size in bytes of the heap used by the malloc() and free() device system calls.");
328 "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps " 329 "the default value.");
332 descriptions.
add(
"ROCmService",
desc);
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
bool isProcessWideService(TFileService const *)
#define rsmiCheck(ARG,...)
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
std::vector< std::pair< int, int > > computeCapabilities_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
int numberOfDevices() const final
bool enabled() const final
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Log< level::Info, false > LogInfo
#define hipCheck(ARG,...)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
std::string decodeVersion(int version)
Log< level::Warning, false > LogWarning
std::pair< int, int > computeCapability(int device) const final