HeterogeneousCore
CUDAServices
plugins
CUDAMonitoringService.cc
Go to the documentation of this file.
1
#include <iostream>
2
3
#include <cuda.h>
4
5
#include "
DataFormats/Provenance/interface/ModuleDescription.h
"
6
#include "
FWCore/MessageLogger/interface/MessageLogger.h
"
7
#include "
FWCore/ParameterSet/interface/ConfigurationDescriptions.h
"
8
#include "
FWCore/ParameterSet/interface/ParameterSet.h
"
9
#include "
FWCore/ParameterSet/interface/ParameterSetDescription.h
"
10
#include "
FWCore/ServiceRegistry/interface/ActivityRegistry.h
"
11
#include "
FWCore/ServiceRegistry/interface/ModuleCallingContext.h
"
12
#include "
FWCore/ServiceRegistry/interface/Service.h
"
13
#include "
FWCore/ServiceRegistry/interface/ServiceMaker.h
"
14
#include "
HeterogeneousCore/CUDAServices/interface/CUDAService.h
"
15
#include "
HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h
"
16
17
namespace
edm
{
18
class
StreamContext;
19
}
20
21
class
CUDAMonitoringService
{
22
public
:
23
CUDAMonitoringService
(
edm::ParameterSet
const
& iConfig,
edm::ActivityRegistry
& iRegistry);
24
~CUDAMonitoringService
() =
default
;
25
26
static
void
fillDescriptions
(
edm::ConfigurationDescriptions
& descriptions);
27
28
void
postModuleConstruction
(
edm::ModuleDescription
const
& desc);
29
void
postModuleBeginStream
(
edm::StreamContext
const
&,
edm::ModuleCallingContext
const
& mcc);
30
void
postEvent
(
edm::StreamContext
const
& sc);
31
32
private
:
33
int
numberOfDevices_
= 0;
34
};
35
36
CUDAMonitoringService::CUDAMonitoringService
(
edm::ParameterSet
const
&
config
,
edm::ActivityRegistry
& registry) {
37
// make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
38
edm::Service<CUDAService>
cudaService;
39
if
(!cudaService->
enabled
())
40
return
;
41
numberOfDevices_
= cudaService->
numberOfDevices
();
42
43
if
(
config
.getUntrackedParameter<
bool
>(
"memoryConstruction"
)) {
44
registry.
watchPostModuleConstruction
(
this
, &
CUDAMonitoringService::postModuleConstruction
);
45
}
46
if
(
config
.getUntrackedParameter<
bool
>(
"memoryBeginStream"
)) {
47
registry.
watchPostModuleBeginStream
(
this
, &
CUDAMonitoringService::postModuleBeginStream
);
48
}
49
if
(
config
.getUntrackedParameter<
bool
>(
"memoryPerEvent"
)) {
50
registry.
watchPostEvent
(
this
, &
CUDAMonitoringService::postEvent
);
51
}
52
}
53
54
void
CUDAMonitoringService::fillDescriptions
(
edm::ConfigurationDescriptions
& descriptions) {
55
edm::ParameterSetDescription
desc;
56
57
desc.
addUntracked
<
bool
>(
"memoryConstruction"
,
false
)
58
->setComment(
"Print memory information for each device after the construction of each module"
);
59
desc.
addUntracked
<
bool
>(
"memoryBeginStream"
,
true
)
60
->setComment(
"Print memory information for each device after the beginStream() of each module"
);
61
desc.
addUntracked
<
bool
>(
"memoryPerEvent"
,
true
)
62
->setComment(
"Print memory information for each device after each event"
);
63
64
descriptions.
add
(
"CUDAMonitoringService"
, desc);
65
descriptions.
setComment
(
66
"The memory information is the global state of the device. This gets confusing if there are multiple processes "
67
"running on the same device. Probably the information retrieval should be re-thought?"
);
68
}
69
70
// activity handlers
71
namespace
{
72
template
<
typename
T>
73
void
dumpUsedMemory(
T
&
log
,
int
num
) {
74
int
old = 0;
75
cudaCheck
(cudaGetDevice(&old));
76
for
(
int
i
= 0;
i
<
num
; ++
i
) {
77
size_t
freeMemory, totalMemory;
78
cudaCheck
(cudaSetDevice(
i
));
79
cudaCheck
(cudaMemGetInfo(&freeMemory, &totalMemory));
80
log
<<
"\n"
81
<<
i
<<
": "
<< (totalMemory - freeMemory) / (1 << 20) <<
" MB used / "
<< totalMemory / (1 << 20)
82
<<
" MB total"
;
83
}
84
cudaCheck
(cudaSetDevice(old));
85
}
86
}
// namespace
87
88
void
CUDAMonitoringService::postModuleConstruction
(
edm::ModuleDescription
const
& desc) {
89
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
90
log
<<
"CUDA device memory after construction of "
<< desc.
moduleLabel
() <<
" ("
<< desc.
moduleName
() <<
")"
;
91
dumpUsedMemory(
log
,
numberOfDevices_
);
92
}
93
94
void
CUDAMonitoringService::postModuleBeginStream
(
edm::StreamContext
const
&,
edm::ModuleCallingContext
const
& mcc) {
95
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
96
log
<<
"CUDA device memory after beginStream() of "
<< mcc.
moduleDescription
()->
moduleLabel
() <<
" ("
97
<< mcc.
moduleDescription
()->
moduleName
() <<
")"
;
98
dumpUsedMemory(
log
,
numberOfDevices_
);
99
}
100
101
void
CUDAMonitoringService::postEvent
(
edm::StreamContext
const
& sc) {
102
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
103
log
<<
"CUDA device memory after event"
;
104
dumpUsedMemory(
log
,
numberOfDevices_
);
105
}
106
107
DEFINE_FWK_SERVICE
(
CUDAMonitoringService
);
ConfigurationDescriptions.h
CUDAMonitoringService::~CUDAMonitoringService
~CUDAMonitoringService()=default
edm::ModuleDescription::moduleLabel
std::string const & moduleLabel() const
Definition:
ModuleDescription.h:43
ModuleCallingContext.h
mps_fire.i
i
Definition:
mps_fire.py:355
MessageLogger.h
edm
HLT enums.
Definition:
AlignableModifier.h:19
edm::ModuleDescription::moduleName
std::string const & moduleName() const
Definition:
ModuleDescription.h:42
edm::ParameterSetDescription
Definition:
ParameterSetDescription.h:52
DEFINE_FWK_SERVICE
#define DEFINE_FWK_SERVICE(type)
Definition:
ServiceMaker.h:105
CUDAMonitoringService
Definition:
CUDAMonitoringService.cc:21
edm::ModuleCallingContext::moduleDescription
ModuleDescription const * moduleDescription() const
Definition:
ModuleCallingContext.h:50
CUDAService::numberOfDevices
int numberOfDevices() const
Definition:
CUDAService.h:24
edm::ModuleDescription
Definition:
ModuleDescription.h:21
edm::ActivityRegistry::watchPostModuleBeginStream
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
Definition:
ActivityRegistry.h:251
ModuleDescription.h
ActivityRegistry.h
config
Definition:
config.py:1
CUDAMonitoringService::CUDAMonitoringService
CUDAMonitoringService(edm::ParameterSet const &iConfig, edm::ActivityRegistry &iRegistry)
Definition:
CUDAMonitoringService.cc:36
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition:
ConfigurationDescriptions.cc:57
edm::StreamContext
Definition:
StreamContext.h:31
Service.h
edm::ActivityRegistry
Definition:
ActivityRegistry.h:132
ParameterSetDescription.h
edm::ActivityRegistry::watchPostModuleConstruction
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
Definition:
ActivityRegistry.h:618
CUDAMonitoringService::numberOfDevices_
int numberOfDevices_
Definition:
CUDAMonitoringService.cc:33
ServiceMaker.h
edm::ConfigurationDescriptions
Definition:
ConfigurationDescriptions.h:28
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition:
ParameterSetDescription.h:100
edm::ActivityRegistry::watchPostEvent
void watchPostEvent(PostEvent::slot_type const &iSlot)
Definition:
ActivityRegistry.h:406
edm::ParameterSet
Definition:
ParameterSet.h:36
edm::ConfigurationDescriptions::setComment
void setComment(std::string const &value)
Definition:
ConfigurationDescriptions.cc:48
edm::Service
Definition:
Service.h:30
cudaCheck.h
edm::LogPrint
Definition:
MessageLogger.h:342
CUDAMonitoringService::postEvent
void postEvent(edm::StreamContext const &sc)
Definition:
CUDAMonitoringService.cc:101
CUDAMonitoringService::postModuleConstruction
void postModuleConstruction(edm::ModuleDescription const &desc)
Definition:
CUDAMonitoringService.cc:88
EgammaValidation_cff.num
num
Definition:
EgammaValidation_cff.py:34
CUDAService.h
CUDAService::enabled
bool enabled() const
Definition:
CUDAService.h:22
CUDAMonitoringService::postModuleBeginStream
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &mcc)
Definition:
CUDAMonitoringService.cc:94
cudaCheck
#define cudaCheck(ARG,...)
Definition:
cudaCheck.h:62
T
long double T
Definition:
Basic3DVectorLD.h:48
dqm-mbProfile.log
log
Definition:
dqm-mbProfile.py:17
CUDAMonitoringService::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition:
CUDAMonitoringService.cc:54
ParameterSet.h
edm::ModuleCallingContext
Definition:
ModuleCallingContext.h:29
Generated for CMSSW Reference Manual by
1.8.16