CMS 3D CMS Logo

ROCmMonitoringService.cc
Go to the documentation of this file.
1 #include <iostream>
2 
3 #include <hip/hip_runtime.h>
4 
16 
17 namespace edm {
18  class StreamContext;
19 }
20 
22 public:
24  ~ROCmMonitoringService() = default;
25 
26  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
27 
31  void postEvent(edm::StreamContext const& sc);
32 
33 private:
35 };
36 
38  // make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor
40  if (not service or not service->enabled())
41  return;
42 
43  numberOfDevices_ = service->numberOfDevices();
44 
45  if (config.getUntrackedParameter<bool>("memoryConstruction")) {
47  }
48  if (config.getUntrackedParameter<bool>("memoryBeginStream")) {
50  }
51  if (config.getUntrackedParameter<bool>("memoryPerModule")) {
53  }
54  if (config.getUntrackedParameter<bool>("memoryPerEvent")) {
56  }
57 }
58 
61 
62  desc.addUntracked<bool>("memoryConstruction", false)
63  ->setComment("Print memory information for each device after the construction of each module");
64  desc.addUntracked<bool>("memoryBeginStream", true)
65  ->setComment("Print memory information for each device after the beginStream() of each module");
66  desc.addUntracked<bool>("memoryPerModule", true)
67  ->setComment("Print memory information for each device after the event of each module");
68  desc.addUntracked<bool>("memoryPerEvent", true)
69  ->setComment("Print memory information for each device after each event");
70 
71  descriptions.add("ROCmMonitoringService", desc);
72  descriptions.setComment(
73  "The memory information is the global state of the device. This gets confusing if there are multiple processes "
74  "running on the same device. Probably the information retrieval should be re-thought?");
75 }
76 
77 // activity handlers
78 namespace {
79  template <typename T>
80  void dumpUsedMemory(T& log, int num) {
81  int old = 0;
82  hipCheck(hipGetDevice(&old));
83  constexpr auto mbytes = 1 << 20;
84  for (int i = 0; i < num; ++i) {
85  size_t freeMemory, totalMemory;
86  hipCheck(hipSetDevice(i));
87  hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
88  log << "\n"
89  << i << ": " << (totalMemory - freeMemory) / mbytes << " MB used / " << totalMemory / mbytes << " MB total";
90  }
91  hipCheck(hipSetDevice(old));
92  }
93 } // namespace
94 
96  auto log = edm::LogPrint("ROCmMonitoringService");
97  log << "ROCm device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")";
98  dumpUsedMemory(log, numberOfDevices_);
99 }
100 
102  auto log = edm::LogPrint("ROCmMonitoringService");
103  log << "ROCm device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " ("
104  << mcc.moduleDescription()->moduleName() << ")";
105  dumpUsedMemory(log, numberOfDevices_);
106 }
107 
109  auto log = edm::LogPrint("ROCmMonitoringService");
110  log << "ROCm device memory after processing an event by " << mcc.moduleDescription()->moduleLabel() << " ("
111  << mcc.moduleDescription()->moduleName() << ")";
112  dumpUsedMemory(log, numberOfDevices_);
113 }
114 
116  auto log = edm::LogPrint("ROCmMonitoringService");
117  log << "ROCm device memory after event";
118  dumpUsedMemory(log, numberOfDevices_);
119 }
120 
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &sc, edm::ModuleCallingContext const &mcc)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPostEvent(PostEvent::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
std::string const & moduleName() const
Definition: config.py:1
ModuleDescription const * moduleDescription() const noexcept
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &mcc)
Log< level::Warning, true > LogPrint
#define DEFINE_FWK_SERVICE(type)
Definition: ServiceMaker.h:97
void setComment(std::string const &value)
ROCmMonitoringService(edm::ParameterSet const &iConfig, edm::ActivityRegistry &iRegistry)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
#define hipCheck(ARG,...)
Definition: hipCheck.h:52
void add(std::string const &label, ParameterSetDescription const &psetDescription)
HLT enums.
void postEvent(edm::StreamContext const &sc)
void postModuleConstruction(edm::ModuleDescription const &desc)
long double T
std::string const & moduleLabel() const
~ROCmMonitoringService()=default