CMS 3D CMS Logo

CUDAMonitoringService.cc
Go to the documentation of this file.
1 #include <iostream>
2 
3 #include <cuda.h>
4 
16 
17 namespace edm {
18  class StreamContext;
19 }
20 
22 public:
24  ~CUDAMonitoringService() = default;
25 
26  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
27 
30  void postEvent(edm::StreamContext const& sc);
31 
32 private:
34 };
35 
37  // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
38  edm::Service<CUDAService> cudaService;
39  if (!cudaService->enabled())
40  return;
41  numberOfDevices_ = cudaService->numberOfDevices();
42 
43  if (config.getUntrackedParameter<bool>("memoryConstruction")) {
45  }
46  if (config.getUntrackedParameter<bool>("memoryBeginStream")) {
48  }
49  if (config.getUntrackedParameter<bool>("memoryPerEvent")) {
51  }
52 }
53 
56 
57  desc.addUntracked<bool>("memoryConstruction", false)
58  ->setComment("Print memory information for each device after the construction of each module");
59  desc.addUntracked<bool>("memoryBeginStream", true)
60  ->setComment("Print memory information for each device after the beginStream() of each module");
61  desc.addUntracked<bool>("memoryPerEvent", true)
62  ->setComment("Print memory information for each device after each event");
63 
64  descriptions.add("CUDAMonitoringService", desc);
65  descriptions.setComment(
66  "The memory information is the global state of the device. This gets confusing if there are multiple processes "
67  "running on the same device. Probably the information retrieval should be re-thought?");
68 }
69 
70 // activity handlers
71 namespace {
72  template <typename T>
73  void dumpUsedMemory(T& log, int num) {
74  int old = 0;
75  cudaCheck(cudaGetDevice(&old));
76  for (int i = 0; i < num; ++i) {
77  size_t freeMemory, totalMemory;
78  cudaCheck(cudaSetDevice(i));
79  cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
80  log << "\n"
81  << i << ": " << (totalMemory - freeMemory) / (1 << 20) << " MB used / " << totalMemory / (1 << 20)
82  << " MB total";
83  }
84  cudaCheck(cudaSetDevice(old));
85  }
86 } // namespace
87 
89  auto log = edm::LogPrint("CUDAMonitoringService");
90  log << "CUDA device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")";
91  dumpUsedMemory(log, numberOfDevices_);
92 }
93 
95  auto log = edm::LogPrint("CUDAMonitoringService");
96  log << "CUDA device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " ("
97  << mcc.moduleDescription()->moduleName() << ")";
98  dumpUsedMemory(log, numberOfDevices_);
99 }
100 
102  auto log = edm::LogPrint("CUDAMonitoringService");
103  log << "CUDA device memory after event";
104  dumpUsedMemory(log, numberOfDevices_);
105 }
106 
ConfigurationDescriptions.h
CUDAMonitoringService::~CUDAMonitoringService
~CUDAMonitoringService()=default
edm::ModuleDescription::moduleLabel
std::string const & moduleLabel() const
Definition: ModuleDescription.h:43
ModuleCallingContext.h
mps_fire.i
i
Definition: mps_fire.py:355
MessageLogger.h
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::ModuleDescription::moduleName
std::string const & moduleName() const
Definition: ModuleDescription.h:42
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
DEFINE_FWK_SERVICE
#define DEFINE_FWK_SERVICE(type)
Definition: ServiceMaker.h:105
CUDAMonitoringService
Definition: CUDAMonitoringService.cc:21
edm::ModuleCallingContext::moduleDescription
ModuleDescription const * moduleDescription() const
Definition: ModuleCallingContext.h:50
CUDAService::numberOfDevices
int numberOfDevices() const
Definition: CUDAService.h:24
edm::ModuleDescription
Definition: ModuleDescription.h:21
edm::ActivityRegistry::watchPostModuleBeginStream
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:251
ModuleDescription.h
ActivityRegistry.h
config
Definition: config.py:1
CUDAMonitoringService::CUDAMonitoringService
CUDAMonitoringService(edm::ParameterSet const &iConfig, edm::ActivityRegistry &iRegistry)
Definition: CUDAMonitoringService.cc:36
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:57
edm::StreamContext
Definition: StreamContext.h:31
Service.h
edm::ActivityRegistry
Definition: ActivityRegistry.h:132
ParameterSetDescription.h
edm::ActivityRegistry::watchPostModuleConstruction
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
Definition: ActivityRegistry.h:618
CUDAMonitoringService::numberOfDevices_
int numberOfDevices_
Definition: CUDAMonitoringService.cc:33
ServiceMaker.h
edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:100
edm::ActivityRegistry::watchPostEvent
void watchPostEvent(PostEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:406
edm::ParameterSet
Definition: ParameterSet.h:36
edm::ConfigurationDescriptions::setComment
void setComment(std::string const &value)
Definition: ConfigurationDescriptions.cc:48
edm::Service
Definition: Service.h:30
cudaCheck.h
edm::LogPrint
Definition: MessageLogger.h:342
CUDAMonitoringService::postEvent
void postEvent(edm::StreamContext const &sc)
Definition: CUDAMonitoringService.cc:101
CUDAMonitoringService::postModuleConstruction
void postModuleConstruction(edm::ModuleDescription const &desc)
Definition: CUDAMonitoringService.cc:88
EgammaValidation_cff.num
num
Definition: EgammaValidation_cff.py:34
CUDAService.h
CUDAService::enabled
bool enabled() const
Definition: CUDAService.h:22
CUDAMonitoringService::postModuleBeginStream
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &mcc)
Definition: CUDAMonitoringService.cc:94
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
T
long double T
Definition: Basic3DVectorLD.h:48
dqm-mbProfile.log
log
Definition: dqm-mbProfile.py:17
CUDAMonitoringService::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: CUDAMonitoringService.cc:54
ParameterSet.h
edm::ModuleCallingContext
Definition: ModuleCallingContext.h:29