Main Page
Namespaces
Namespace List
Namespace Members
All
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Functions
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Variables
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Typedefs
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Enumerations
a
b
c
d
e
f
g
h
i
j
k
l
m
o
p
q
r
s
t
u
v
w
z
Enumerator
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Classes
Class List
Class Index
Class Hierarchy
Class Members
All
:
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
~
Functions
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
~
Variables
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Typedefs
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Enumerations
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
Enumerator
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
Properties
_
a
d
e
f
l
m
o
p
s
t
u
v
Related Functions
:
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
Package Documentation
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Properties
Friends
Macros
Modules
Pages
HeterogeneousCore
CUDAServices
plugins
CUDAMonitoringService.cc
Go to the documentation of this file.
1
#include <iostream>
2
3
#include <cuda.h>
4
5
#include "
DataFormats/Provenance/interface/ModuleDescription.h
"
6
#include "
FWCore/MessageLogger/interface/MessageLogger.h
"
7
#include "
FWCore/ParameterSet/interface/ConfigurationDescriptions.h
"
8
#include "
FWCore/ParameterSet/interface/ParameterSet.h
"
9
#include "
FWCore/ParameterSet/interface/ParameterSetDescription.h
"
10
#include "
FWCore/ServiceRegistry/interface/ActivityRegistry.h
"
11
#include "
FWCore/ServiceRegistry/interface/ModuleCallingContext.h
"
12
#include "
FWCore/ServiceRegistry/interface/Service.h
"
13
#include "
FWCore/ServiceRegistry/interface/ServiceMaker.h
"
14
#include "
HeterogeneousCore/CUDAServices/interface/CUDAService.h
"
15
#include "
HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h
"
16
#include "
HeterogeneousCore/CUDAUtilities/interface/deviceAllocatorStatus.h
"
17
18
namespace
edm
{
19
class
StreamContext;
20
}
21
22
class
CUDAMonitoringService
{
23
public
:
24
CUDAMonitoringService
(
edm::ParameterSet
const
& iConfig,
edm::ActivityRegistry
& iRegistry);
25
~CUDAMonitoringService
() =
default
;
26
27
static
void
fillDescriptions
(
edm::ConfigurationDescriptions
& descriptions);
28
29
void
postModuleConstruction
(
edm::ModuleDescription
const
&
desc
);
30
void
postModuleBeginStream
(
edm::StreamContext
const
&,
edm::ModuleCallingContext
const
& mcc);
31
void
postModuleEvent
(
edm::StreamContext
const
& sc,
edm::ModuleCallingContext
const
& mcc);
32
void
postEvent
(
edm::StreamContext
const
& sc);
33
34
private
:
35
int
numberOfDevices_
= 0;
36
};
37
38
CUDAMonitoringService::CUDAMonitoringService
(
edm::ParameterSet
const
&
config
,
edm::ActivityRegistry
& registry) {
39
// make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
40
edm::Service<CUDAService>
cudaService;
41
if
(!cudaService->
enabled
())
42
return
;
43
numberOfDevices_
= cudaService->
numberOfDevices
();
44
45
if
(
config
.getUntrackedParameter<
bool
>(
"memoryConstruction"
)) {
46
registry.
watchPostModuleConstruction
(
this
, &
CUDAMonitoringService::postModuleConstruction
);
47
}
48
if
(
config
.getUntrackedParameter<
bool
>(
"memoryBeginStream"
)) {
49
registry.
watchPostModuleBeginStream
(
this
, &
CUDAMonitoringService::postModuleBeginStream
);
50
}
51
if
(
config
.getUntrackedParameter<
bool
>(
"memoryPerModule"
)) {
52
registry.
watchPostModuleEvent
(
this
, &
CUDAMonitoringService::postModuleEvent
);
53
}
54
if
(
config
.getUntrackedParameter<
bool
>(
"memoryPerEvent"
)) {
55
registry.
watchPostEvent
(
this
, &
CUDAMonitoringService::postEvent
);
56
}
57
}
58
59
void
CUDAMonitoringService::fillDescriptions
(
edm::ConfigurationDescriptions
& descriptions) {
60
edm::ParameterSetDescription
desc
;
61
62
desc
.addUntracked<
bool
>(
"memoryConstruction"
,
false
)
63
->setComment(
"Print memory information for each device after the construction of each module"
);
64
desc
.addUntracked<
bool
>(
"memoryBeginStream"
,
true
)
65
->setComment(
"Print memory information for each device after the beginStream() of each module"
);
66
desc
.addUntracked<
bool
>(
"memoryPerModule"
,
true
)
67
->setComment(
"Print memory information for each device after the event of each module"
);
68
desc
.addUntracked<
bool
>(
"memoryPerEvent"
,
true
)
69
->setComment(
"Print memory information for each device after each event"
);
70
71
descriptions.
add
(
"CUDAMonitoringService"
,
desc
);
72
descriptions.
setComment
(
73
"The memory information is the global state of the device. This gets confusing if there are multiple processes "
74
"running on the same device. Probably the information retrieval should be re-thought?"
);
75
}
76
77
// activity handlers
78
namespace
{
79
template
<
typename
T>
80
void
dumpUsedMemory(
T
&
log
,
int
num
) {
81
auto
const
cachingDeviceAllocatorStatus =
cms::cuda::deviceAllocatorStatus
();
82
int
old = 0;
83
cudaCheck
(cudaGetDevice(&old));
84
constexpr
auto
mbytes = 1 << 20;
85
for
(
int
i
= 0;
i
<
num
; ++
i
) {
86
size_t
freeMemory, totalMemory;
87
cudaCheck
(cudaSetDevice(
i
));
88
cudaCheck
(cudaMemGetInfo(&freeMemory, &totalMemory));
89
log
<<
"\n"
90
<<
i
<<
": "
<< (totalMemory - freeMemory) / mbytes <<
" MB used / "
<< totalMemory / mbytes <<
" MB total"
;
91
auto
found
= cachingDeviceAllocatorStatus.find(
i
);
92
if
(
found
!= cachingDeviceAllocatorStatus.end()) {
93
auto
const
& cached =
found
->second;
94
log
<<
"; CachingDeviceAllocator "
<< cached.live / mbytes <<
" MB live "
95
<<
"("
<< cached.liveRequested / mbytes <<
" MB requested) "
<< cached.free / mbytes <<
" MB free "
96
<< (cached.live + cached.free) / mbytes <<
" MB total cached"
;
97
}
98
}
99
cudaCheck
(cudaSetDevice(old));
100
}
101
}
// namespace
102
103
void
CUDAMonitoringService::postModuleConstruction
(
edm::ModuleDescription
const
&
desc
) {
104
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
105
log
<<
"CUDA device memory after construction of "
<<
desc
.moduleLabel() <<
" ("
<<
desc
.moduleName() <<
")"
;
106
dumpUsedMemory(
log
,
numberOfDevices_
);
107
}
108
109
void
CUDAMonitoringService::postModuleBeginStream
(
edm::StreamContext
const
&,
edm::ModuleCallingContext
const
& mcc) {
110
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
111
log
<<
"CUDA device memory after beginStream() of "
<< mcc.
moduleDescription
()->
moduleLabel
() <<
" ("
112
<< mcc.
moduleDescription
()->
moduleName
() <<
")"
;
113
dumpUsedMemory(
log
,
numberOfDevices_
);
114
}
115
116
void
CUDAMonitoringService::postModuleEvent
(
edm::StreamContext
const
&,
edm::ModuleCallingContext
const
& mcc) {
117
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
118
log
<<
"CUDA device memory after processing an event by "
<< mcc.
moduleDescription
()->
moduleLabel
() <<
" ("
119
<< mcc.
moduleDescription
()->
moduleName
() <<
")"
;
120
dumpUsedMemory(
log
,
numberOfDevices_
);
121
}
122
123
void
CUDAMonitoringService::postEvent
(
edm::StreamContext
const
& sc) {
124
auto
log
=
edm::LogPrint
(
"CUDAMonitoringService"
);
125
log
<<
"CUDA device memory after event"
;
126
dumpUsedMemory(
log
,
numberOfDevices_
);
127
}
128
129
DEFINE_FWK_SERVICE
(
CUDAMonitoringService
);
ConfigurationDescriptions.h
CUDAMonitoringService::~CUDAMonitoringService
~CUDAMonitoringService()=default
edm::ModuleDescription::moduleLabel
std::string const & moduleLabel() const
Definition:
ModuleDescription.h:43
ModuleCallingContext.h
mps_fire.i
i
Definition:
mps_fire.py:428
MessageLogger.h
edm
HLT enums.
Definition:
AlignableModifier.h:19
edm::LogPrint
Log< level::Warning, true > LogPrint
Definition:
MessageLogger.h:130
edm::ModuleDescription::moduleName
std::string const & moduleName() const
Definition:
ModuleDescription.h:42
edm::ParameterSetDescription
Definition:
ParameterSetDescription.h:52
DEFINE_FWK_SERVICE
#define DEFINE_FWK_SERVICE(type)
Definition:
ServiceMaker.h:96
CUDAMonitoringService
Definition:
CUDAMonitoringService.cc:22
edm::ModuleCallingContext::moduleDescription
ModuleDescription const * moduleDescription() const
Definition:
ModuleCallingContext.h:50
newFWLiteAna.found
found
Definition:
newFWLiteAna.py:118
CUDAService::numberOfDevices
int numberOfDevices() const
Definition:
CUDAService.h:24
CUDAMonitoringService::postModuleEvent
void postModuleEvent(edm::StreamContext const &sc, edm::ModuleCallingContext const &mcc)
Definition:
CUDAMonitoringService.cc:116
edm::ModuleDescription
Definition:
ModuleDescription.h:21
edm::ActivityRegistry::watchPostModuleBeginStream
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
Definition:
ActivityRegistry.h:269
ModuleDescription.h
ActivityRegistry.h
config
Definition:
config.py:1
CUDAMonitoringService::CUDAMonitoringService
CUDAMonitoringService(edm::ParameterSet const &iConfig, edm::ActivityRegistry &iRegistry)
Definition:
CUDAMonitoringService.cc:38
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition:
ConfigurationDescriptions.cc:57
edm::StreamContext
Definition:
StreamContext.h:31
Service.h
edm::ActivityRegistry
Definition:
ActivityRegistry.h:134
cms::cuda::deviceAllocatorStatus
allocator::GpuCachedBytes deviceAllocatorStatus()
Definition:
deviceAllocatorStatus.cc:6
ParameterSetDescription.h
edm::ActivityRegistry::watchPostModuleConstruction
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
Definition:
ActivityRegistry.h:722
CUDAMonitoringService::numberOfDevices_
int numberOfDevices_
Definition:
CUDAMonitoringService.cc:35
ServiceMaker.h
edm::ConfigurationDescriptions
Definition:
ConfigurationDescriptions.h:28
edm::ActivityRegistry::watchPostEvent
void watchPostEvent(PostEvent::slot_type const &iSlot)
Definition:
ActivityRegistry.h:480
edm::ParameterSet
Definition:
ParameterSet.h:47
edm::ConfigurationDescriptions::setComment
void setComment(std::string const &value)
Definition:
ConfigurationDescriptions.cc:48
edm::Service
Definition:
Service.h:30
cudaCheck.h
CUDAMonitoringService::postEvent
void postEvent(edm::StreamContext const &sc)
Definition:
CUDAMonitoringService.cc:123
deviceAllocatorStatus.h
CUDAMonitoringService::postModuleConstruction
void postModuleConstruction(edm::ModuleDescription const &desc)
Definition:
CUDAMonitoringService.cc:103
EgammaValidation_cff.num
num
Definition:
EgammaValidation_cff.py:33
CUDAService.h
CUDAService::enabled
bool enabled() const
Definition:
CUDAService.h:22
CUDAMonitoringService::postModuleBeginStream
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &mcc)
Definition:
CUDAMonitoringService.cc:109
submitPVResolutionJobs.desc
string desc
Definition:
submitPVResolutionJobs.py:251
cudaCheck
#define cudaCheck(ARG,...)
Definition:
cudaCheck.h:69
edm::ActivityRegistry::watchPostModuleEvent
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
Definition:
ActivityRegistry.h:799
T
long double T
Definition:
Basic3DVectorLD.h:48
dqm-mbProfile.log
log
Definition:
dqm-mbProfile.py:17
CUDAMonitoringService::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition:
CUDAMonitoringService.cc:59
ParameterSet.h
edm::ModuleCallingContext
Definition:
ModuleCallingContext.h:29
Generated for CMSSW Reference Manual by
1.8.16