db/dde/CUDAMonitoringService_8cc_source.html

 #include <iostream>

 #include <cuda.h>

 #include "DataFormats/Provenance/interface/ModuleDescription.h"
 #include "FWCore/MessageLogger/interface/MessageLogger.h"
 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
 #include "FWCore/ParameterSet/interface/ParameterSet.h"
 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
 #include "FWCore/ServiceRegistry/interface/Service.h"
 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/deviceAllocatorStatus.h"

 namespace edm {
   class StreamContext;
 }

 class CUDAMonitoringService {
 public:
   CUDAMonitoringService(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iRegistry);
   ~CUDAMonitoringService() = default;

   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);

   void postModuleConstruction(edm::ModuleDescription const& desc);
   void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc);
   void postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc);
   void postEvent(edm::StreamContext const& sc);

 private:
   int numberOfDevices_ = 0;
 };

 CUDAMonitoringService::CUDAMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
   // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
   edm::Service<CUDAInterface> cuda;
   if (not cuda or not cuda->enabled())
     return;

   numberOfDevices_ = cuda->numberOfDevices();

   if (config.getUntrackedParameter<bool>("memoryConstruction")) {
     registry.watchPostModuleConstruction(this, &CUDAMonitoringService::postModuleConstruction);
   }
   if (config.getUntrackedParameter<bool>("memoryBeginStream")) {
     registry.watchPostModuleBeginStream(this, &CUDAMonitoringService::postModuleBeginStream);
   }
   if (config.getUntrackedParameter<bool>("memoryPerModule")) {
     registry.watchPostModuleEvent(this, &CUDAMonitoringService::postModuleEvent);
   }
   if (config.getUntrackedParameter<bool>("memoryPerEvent")) {
     registry.watchPostEvent(this, &CUDAMonitoringService::postEvent);
   }
 }

 void CUDAMonitoringService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
   edm::ParameterSetDescription desc;

   desc.addUntracked<bool>("memoryConstruction", false)
       ->setComment("Print memory information for each device after the construction of each module");
   desc.addUntracked<bool>("memoryBeginStream", true)
       ->setComment("Print memory information for each device after the beginStream() of each module");
   desc.addUntracked<bool>("memoryPerModule", true)
       ->setComment("Print memory information for each device after the event of each module");
   desc.addUntracked<bool>("memoryPerEvent", true)
       ->setComment("Print memory information for each device after each event");

   descriptions.add("CUDAMonitoringService", desc);
   descriptions.setComment(
       "The memory information is the global state of the device. This gets confusing if there are multiple processes "
       "running on the same device. Probably the information retrieval should be re-thought?");
 }

 // activity handlers
 namespace {
   template <typename T>
   void dumpUsedMemory(T& log, int num) {
     auto const cachingDeviceAllocatorStatus = cms::cuda::deviceAllocatorStatus();
     int old = 0;
     cudaCheck(cudaGetDevice(&old));
     constexpr auto mbytes = 1 << 20;
     for (int i = 0; i < num; ++i) {
       size_t freeMemory, totalMemory;
       cudaCheck(cudaSetDevice(i));
       cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
       log << "\n"
           << i << ": " << (totalMemory - freeMemory) / mbytes << " MB used / " << totalMemory / mbytes << " MB total";
       auto found = cachingDeviceAllocatorStatus.find(i);
       if (found != cachingDeviceAllocatorStatus.end()) {
         auto const& cached = found->second;
         log << "; CachingDeviceAllocator " << cached.live / mbytes << " MB live "
             << "(" << cached.liveRequested / mbytes << " MB requested) " << cached.free / mbytes << " MB free "
             << (cached.live + cached.free) / mbytes << " MB total cached";
       }
     }
     cudaCheck(cudaSetDevice(old));
   }
 }  // namespace

 void CUDAMonitoringService::postModuleConstruction(edm::ModuleDescription const& desc) {
   auto log = edm::LogPrint("CUDAMonitoringService");
   log << "CUDA device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")";
   dumpUsedMemory(log, numberOfDevices_);
 }

 void CUDAMonitoringService::postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
   auto log = edm::LogPrint("CUDAMonitoringService");
   log << "CUDA device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " ("
       << mcc.moduleDescription()->moduleName() << ")";
   dumpUsedMemory(log, numberOfDevices_);
 }

 void CUDAMonitoringService::postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
   auto log = edm::LogPrint("CUDAMonitoringService");
   log << "CUDA device memory after processing an event by " << mcc.moduleDescription()->moduleLabel() << " ("
       << mcc.moduleDescription()->moduleName() << ")";
   dumpUsedMemory(log, numberOfDevices_);
 }

 void CUDAMonitoringService::postEvent(edm::StreamContext const& sc) {
   auto log = edm::LogPrint("CUDAMonitoringService");
   log << "CUDA device memory after event";
   dumpUsedMemory(log, numberOfDevices_);
 }

 DEFINE_FWK_SERVICE(CUDAMonitoringService);
edm::ActivityRegistry::watchPostModuleConstruction
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
Definition: ActivityRegistry.h:670

CUDAInterface.h

CUDAMonitoringService::CUDAMonitoringService
CUDAMonitoringService(edm::ParameterSet const &iConfig, edm::ActivityRegistry &iRegistry)
Definition: CUDAMonitoringService.cc:38

edm::ModuleCallingContext::moduleDescription
ModuleDescription const  * moduleDescription() const
Definition: ModuleCallingContext.h:55

mps_fire.i
i
Definition: mps_fire.py:429

edm::Service
Definition: Service.h:30

MessageLogger.h

CUDAMonitoringService
Definition: CUDAMonitoringService.cc:22

edm::ModuleCallingContext
Definition: ModuleCallingContext.h:33

edm::ActivityRegistry::watchPostEvent
void watchPostEvent(PostEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:518

CUDAMonitoringService::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: CUDAMonitoringService.cc:60

edm::ActivityRegistry::watchPostModuleEvent
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:747

edm::ModuleDescription::moduleName
std::string const  & moduleName() const
Definition: ModuleDescription.h:42

config
Definition: config.py:1

ModuleDescription.h

ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr
if constexpr(n > 3)
Definition: BrokenLine.h:164

edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52

CUDAMonitoringService::postEvent
void postEvent(edm::StreamContext const &sc)
Definition: CUDAMonitoringService.cc:124

ParameterSet.h

ServiceMaker.h

ParameterSetDescription.h

cms::cuda::deviceAllocatorStatus
allocator::GpuCachedBytes deviceAllocatorStatus()
Definition: deviceAllocatorStatus.cc:6

or
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const  &, edm::Timestamp const  & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12

submitPVResolutionJobs.desc
string desc
Definition: submitPVResolutionJobs.py:254

Service.h

edm::LogPrint
Log< level::Warning, true > LogPrint
Definition: MessageLogger.h:136

DEFINE_FWK_SERVICE
#define DEFINE_FWK_SERVICE(type)
Definition: ServiceMaker.h:97

edm::ConfigurationDescriptions::setComment
void setComment(std::string const &value)
Definition: ConfigurationDescriptions.cc:48

EgammaValidation_cff.num
num
Definition: EgammaValidation_cff.py:33

edm::ActivityRegistry::watchPostModuleBeginStream
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:295

cudaCheck.h

edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:57

ActivityRegistry.h

CUDAMonitoringService::numberOfDevices_
int numberOfDevices_
Definition: CUDAMonitoringService.cc:35

ecalDigis_cff.cuda
cuda
Definition: ecalDigis_cff.py:33

edm
HLT enums.
Definition: AlignableModifier.h:19

CUDAMonitoringService::postModuleConstruction
void postModuleConstruction(edm::ModuleDescription const &desc)
Definition: CUDAMonitoringService.cc:104

edm::ParameterSet
Definition: ParameterSet.h:48

CUDAMonitoringService::postModuleBeginStream
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &mcc)
Definition: CUDAMonitoringService.cc:110

ConfigurationDescriptions.h

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

CUDAMonitoringService::~CUDAMonitoringService
~CUDAMonitoringService()=default

dqm-mbProfile.log
log
Definition: dqm-mbProfile.py:17

edm::ModuleDescription
Definition: ModuleDescription.h:21

T
long double T
Definition: Basic3DVectorLD.h:48

newFWLiteAna.found
found
Definition: newFWLiteAna.py:117

edm::ModuleDescription::moduleLabel
std::string const  & moduleLabel() const
Definition: ModuleDescription.h:43

edm::StreamContext
Definition: StreamContext.h:32

ModuleCallingContext.h

edm::ActivityRegistry
Definition: ActivityRegistry.h:136

edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28

CUDAMonitoringService::postModuleEvent
void postModuleEvent(edm::StreamContext const &sc, edm::ModuleCallingContext const &mcc)
Definition: CUDAMonitoringService.cc:117

deviceAllocatorStatus.h