8 #include <hip/hip_runtime.h> 9 #include <rocm_version.h> 10 #include <rocm_smi/rocm_smi.h> 36 if (device < 0 or device >=
size) {
37 throw std::out_of_range(
"Invalid device index" +
std::to_string(device) +
": the valid range is from 0 to " +
51 #if HIP_VERSION >= 50400000 57 if (hipErrorUnsupportedLimit ==
result) {
58 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": unsupported limit \"" <<
name <<
"\"";
64 if (hipSuccess !=
result) {
65 edm::LogWarning(
"ROCmService") <<
"ROCm device " << device <<
": failed to set limit \"" <<
name <<
"\" to " 66 << request <<
", current value is " <<
value;
67 }
else if (
value != request) {
69 <<
" instead of requested " << request;
72 edm::LogWarning(
"ROCmService") <<
"ROCm versions below 5.4.0 do not support setting device limits.";
84 if (not
config.getUntrackedParameter<
bool>(
"enabled")) {
85 edm::LogInfo(
"ROCmService") <<
"ROCmService disabled by configuration";
90 if (hipSuccess !=
status) {
91 edm::LogWarning(
"ROCmService") <<
"Failed to initialize the ROCm runtime.\n" 92 <<
"Disabling the ROCmService.";
98 char systemDriverVersion[256];
100 rsmiCheck(rsmi_version_str_get(RSMI_SW_COMP_DRIVER, systemDriverVersion,
sizeof(systemDriverVersion) - 1));
105 int driverVersion = 0;
106 hipCheck(hipDriverGetVersion(&driverVersion));
110 int runtimeVersion = 0;
111 hipCheck(hipRuntimeGetVersion(&runtimeVersion));
115 log <<
"AMD kernel driver: " << systemDriverVersion <<
'\n';
116 log <<
"ROCm driver API: " <<
decodeVersion(driverVersion) <<
" (compiled with ROCm " <<
117 #ifdef ROCM_BUILD_INFO 121 ROCM_VERSION_MAJOR <<
'.' << ROCM_VERSION_MINOR <<
'.' << ROCM_VERSION_PATCH
124 log <<
"ROCm runtime API: " <<
decodeVersion(runtimeVersion) <<
" (compiled with HIP " << HIP_VERSION_MAJOR <<
'.' 125 << HIP_VERSION_MINOR <<
'.' << HIP_VERSION_PATCH <<
")\n";
126 log <<
"ROCm runtime successfully initialised, found " <<
numberOfDevices_ <<
" compute devices.\n";
128 log <<
"ROCm runtime version " <<
decodeVersion(runtimeVersion) <<
", driver version " 129 <<
decodeVersion(driverVersion) <<
", AMD driver version " << systemDriverVersion;
132 #if HIP_VERSION >= 50400000 134 auto stackSize =
limits.getUntrackedParameter<
int>(
"hipLimitStackSize");
135 auto mallocHeapSize =
limits.getUntrackedParameter<
int>(
"hipLimitMallocHeapSize");
138 std::set<std::string>
models;
143 hipDeviceProp_t properties;
144 hipCheck(hipGetDeviceProperties(&properties,
i));
145 log <<
'\n' <<
"ROCm device " <<
i <<
": " << properties.name;
154 log <<
" compute capability: " << properties.gcnArchName;
156 log <<
" (" << properties.gcnArchName <<
")";
160 log <<
" streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount <<
'\n';
161 log <<
" ROCm cores: " << std::setw(28) <<
"not yet implemented" <<
'\n';
166 static constexpr
const char* computeModeDescription[] = {
168 "exclusive (single thread)",
170 "exclusive (single process)",
173 log <<
" compute mode:" << std::right << std::setw(27)
174 << computeModeDescription[
std::min(properties.computeMode,
175 static_cast<int>(std::size(computeModeDescription)) - 1)]
181 hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
186 size_t freeMemory = 0;
187 size_t totalMemory = 0;
188 hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
189 log <<
" memory: " << std::setw(6) << freeMemory / (1 << 20) <<
" MB free / " << std::setw(6)
190 << totalMemory / (1 << 20) <<
" MB total\n";
191 log <<
" constant memory: " << std::setw(8) << properties.totalConstMem / (1 << 10) <<
" kB\n";
192 log <<
" L2 cache size: " << std::setw(8) << properties.l2CacheSize / (1 << 10) <<
" kB\n";
197 log <<
"Other capabilities\n";
198 log <<
" " << (properties.canMapHostMemory ?
"can" :
"cannot")
199 <<
" map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
200 log <<
" " << (properties.pageableMemoryAccess ?
"supports" :
"does not support")
201 <<
" coherently accessing pageable memory without calling hipHostRegister() on it\n";
202 log <<
" " << (properties.pageableMemoryAccessUsesHostPageTables ?
"can" :
"cannot")
203 <<
" access pageable memory via the host's page tables\n";
204 log <<
" " << (properties.managedMemory ?
"supports" :
"does not support")
205 <<
" allocating managed memory on this system\n";
206 log <<
" " << (properties.concurrentManagedAccess ?
"can" :
"cannot")
207 <<
" coherently access managed memory concurrently with the host\n";
209 <<
"the host " << (properties.directManagedMemAccessFromHost ?
"can" :
"cannot")
210 <<
" directly access managed memory on the device without migration\n";
211 log <<
" " << (properties.cooperativeLaunch ?
"supports" :
"does not support")
212 <<
" launching cooperative kernels via hipLaunchCooperativeKernel()\n";
213 log <<
" " << (properties.cooperativeMultiDeviceLaunch ?
"supports" :
"does not support")
214 <<
" launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
221 log <<
"ROCm flags\n";
224 switch (
flags & hipDeviceScheduleMask) {
225 case hipDeviceScheduleAuto:
226 log <<
" thread policy: default\n";
228 case hipDeviceScheduleSpin:
229 log <<
" thread policy: spin\n";
231 case hipDeviceScheduleYield:
232 log <<
" thread policy: yield\n";
234 case hipDeviceScheduleBlockingSync:
235 log <<
" thread policy: blocking sync\n";
238 log <<
" thread policy: undefined\n";
240 if (
flags & hipDeviceMapHost) {
241 log <<
" pinned host memory allocations: enabled\n";
243 log <<
" pinned host memory allocations: disabled\n";
245 if (
flags & hipDeviceLmemResizeToMax) {
246 log <<
" kernel host memory reuse: enabled\n";
248 log <<
" kernel host memory reuse: disabled\n";
256 #if HIP_VERSION >= 50400000 258 if (stackSize >= 0) {
259 setHipLimit(hipLimitStackSize,
"hipLimitStackSize", stackSize);
263 if (mallocHeapSize >= 0) {
264 setHipLimit(hipLimitMallocHeapSize,
"hipLimitMallocHeapSize", mallocHeapSize);
270 log <<
"ROCm limits\n";
271 #if HIP_VERSION >= 50400000 273 log <<
" stack size: " << std::setw(10) <<
value / (1 << 10) <<
" kB\n";
276 log <<
" malloc heap size: " << std::setw(10) <<
value / (1 << 20) <<
" MB\n";
282 std::vector<std::string> modelsV(
models.begin(),
models.end());
293 log <<
'\n' <<
"ROCmService fully initialized";
313 desc.addUntracked<
bool>(
"enabled",
true);
314 desc.addUntracked<
bool>(
"verbose",
false);
316 #if HIP_VERSION >= 50400000 318 limits.addUntracked<
int>(
"hipLimitStackSize", -1)->setComment(
"Stack size in bytes of each GPU thread.");
319 limits.addUntracked<
int>(
"hipLimitMallocHeapSize", -1)
320 ->setComment(
"Size in bytes of the heap used by the malloc() and free() device system calls.");
323 "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps " 324 "the default value.");
327 descriptions.
add(
"ROCmService",
desc);
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
bool isProcessWideService(TFileService const *)
#define rsmiCheck(ARG,...)
static std::string to_string(const XMLCh *ch)
ROCmService(edm::ParameterSet const &config)
Constructor.
std::vector< std::pair< int, int > > computeCapabilities_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
int numberOfDevices() const final
bool enabled() const final
void setHipLimit(hipLimit_t limit, const char *name, size_t request)
Log< level::Info, false > LogInfo
#define hipCheck(ARG,...)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
std::string decodeVersion(int version)
Log< level::Warning, false > LogWarning
std::pair< int, int > computeCapability(int device) const final