12 #include <tbb/concurrent_vector.h> 13 #include <tbb/enumerable_thread_specific.h> 15 #include <boost/format.hpp> 17 #include <cuda_profiler_api.h> 18 #include <nvToolsExt.h> 48 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char* message) {
49 nvtxEventAttributes_t eventAttrib = { 0 };
50 eventAttrib.version = NVTX_VERSION;
51 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
52 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
53 eventAttrib.message.ascii = message;
54 return nvtxDomainRangePushEx(domain, &eventAttrib);
58 int nvtxDomainRangePushColor(nvtxDomainHandle_t domain, const
char* message, uint32_t
color) {
59 nvtxEventAttributes_t eventAttrib = { 0 };
60 eventAttrib.version = NVTX_VERSION;
61 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
62 eventAttrib.colorType = NVTX_COLOR_ARGB;
63 eventAttrib.color =
color;
64 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
65 eventAttrib.message.ascii = message;
66 return nvtxDomainRangePushEx(domain, &eventAttrib);
70 nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain, const
char* message) {
71 nvtxEventAttributes_t eventAttrib = { 0 };
72 eventAttrib.version = NVTX_VERSION;
73 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
74 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
75 eventAttrib.message.ascii = message;
76 return nvtxDomainRangeStartEx(domain, &eventAttrib);
79 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char* message, uint32_t
color) {
80 nvtxEventAttributes_t eventAttrib = { 0 };
81 eventAttrib.version = NVTX_VERSION;
82 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
83 eventAttrib.colorType = NVTX_COLOR_ARGB;
84 eventAttrib.color =
color;
85 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
86 eventAttrib.message.ascii = message;
87 return nvtxDomainRangeStartEx(domain, &eventAttrib);
90 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char* message) {
91 nvtxEventAttributes_t eventAttrib = { 0 };
92 eventAttrib.version = NVTX_VERSION;
93 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
94 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
95 eventAttrib.message.ascii = message;
96 nvtxDomainMarkEx(domain, &eventAttrib);
100 void nvtxDomainMarkColor(nvtxDomainHandle_t domain, const
char* message, uint32_t
color) {
101 nvtxEventAttributes_t eventAttrib = { 0 };
102 eventAttrib.version = NVTX_VERSION;
103 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
104 eventAttrib.colorType = NVTX_COLOR_ARGB;
105 eventAttrib.color =
color;
106 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
107 eventAttrib.message.ascii = message;
108 nvtxDomainMarkEx(domain, &eventAttrib);
112 nvtxBlack = 0x00000000,
113 nvtxRed = 0x00ff0000,
114 nvtxDarkGreen = 0x00009900,
115 nvtxGreen = 0x0000ff00,
116 nvtxLightGreen = 0x00ccffcc,
117 nvtxBlue = 0x000000ff,
118 nvtxAmber = 0x00ffbf00,
119 nvtxLightAmber = 0x00fff2cc,
120 nvtxWhite = 0x00ffffff
123 constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
280 return (std::binary_search(highlightModules_.begin(), highlightModules_.end(),
label));
284 return highlight(label) ? nvtxAmber : nvtxGreen;
288 return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
296 std::atomic<bool> globalFirstEventDone_ =
false;
309 global = nvtxDomainCreate(
"EDM Global");
314 nvtxDomainDestroy(global);
315 for (
unsigned int sid = 0; sid < stream.size(); ++sid) {
316 nvtxDomainDestroy(stream[sid]);
321 stream.resize(streams);
322 for (
unsigned int sid = 0; sid < streams; ++sid) {
323 stream[sid] = nvtxDomainCreate((
boost::format(
"EDM Stream %d") % sid).
str().c_str());
334 return domains_.local().global;
338 return domains_.local().stream.at(sid);
344 highlightModules_(config.getUntrackedParameter<
std::vector<
std::
string>>(
"highlightModules")),
345 showModulePrefetching_(config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
346 skipFirstEvent_(config.getUntrackedParameter<
bool>(
"skipFirstEvent")),
347 concurrentStreams_(0),
512 desc.
addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
513 desc.
addUntracked<
bool>(
"showModulePrefetching",
false)->setComment(
"Show the stack of dependencies that requested to run a module.");
514 desc.
addUntracked<
bool>(
"skipFirstEvent",
false)->setComment(
"Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start off' option.");
515 descriptions.
add(
"NVProfilerService", desc);
516 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm. 518 Notes on nvprof options: 519 - the option '--profile-from-start off' should be used if skipFirstEvent is True. 520 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job. 521 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)"); 526 std::stringstream
out;
535 domain.allocate_streams(concurrentStreams_);
537 event_.resize(concurrentStreams_);
541 std::vector<std::atomic<bool>>
tmp(concurrentStreams_);
542 for (
auto & element: tmp)
543 std::atomic_init(&element,
false);
620 nvtxDomainRangePush(
global_domain(), (
"open file "s + lfn).c_str());
634 nvtxDomainRangePush(
global_domain(), (
"close file "s + lfn).c_str());
651 auto const &
msg =
label +
" begin stream";
673 auto const &
msg =
label +
" end stream";
781 nvtxDomainRangePush(
stream_domain(sid),
"stream begin lumi");
820 event_[sid] = nvtxInvalidRangeId;
825 bool expected =
false;
854 auto const &
msg =
label +
" prefetching";
873 auto mid = desc.
id();
876 auto const &
msg =
label +
" construction";
884 auto mid = desc.
id();
893 auto mid = desc.
id();
895 auto const &
msg =
label +
" begin job";
903 auto mid = desc.
id();
912 auto mid = desc.
id();
914 auto const &
msg =
label +
" end job";
922 auto mid = desc.
id();
934 auto const &
msg =
label +
" acquire";
1029 auto const &
msg =
label +
" stream begin run";
1051 auto const &
msg =
label +
" stream end run";
1073 auto const &
msg =
label +
" stream begin lumi";
1095 auto const &
msg =
label +
" stream end lumi";
1116 auto const &
msg =
label +
" global begin run";
1135 auto const &
msg =
label +
" global end run";
1154 auto const &
msg =
label +
" global begin lumi";
1173 auto const &
msg =
label +
" global end lumi";
1190 auto mid = desc.
id();
1193 auto const &
msg =
label +
" construction";
1201 auto mid = desc.
id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
std::string const & pathName() const
unsigned int maxNumberOfThreads() const
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
uint32_t labelColorLight(std::string const &label) const
void preOpenFile(std::string const &, bool)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postOpenFile(std::string const &, bool)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
tbb::enumerable_thread_specific< Domains > domains_
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void postCloseFile(std::string const &, bool)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
nvtxDomainHandle_t global
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
Domains(NVProfilerService *service)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
float __attribute__((vector_size(8))) cms_float32x2_t
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void preCloseFile(std::string const &, bool)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
nvtxDomainHandle_t global_domain()
void postStreamEndRun(edm::StreamContext const &)
std::vector< nvtxDomainHandle_t > stream
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
std::string const & moduleLabel() const
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
bool highlight(std::string const &label) const
void postModuleConstruction(edm::ModuleDescription const &)
std::vector< ModuleDescription const * > const & allModules() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
unsigned int maxNumberOfStreams() const
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
ModuleDescription const * moduleDescription() const
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preStreamEndLumi(edm::StreamContext const &)
uint32_t labelColor(std::string const &label) const
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
format
Some error handling for the usage.
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
#define DEFINE_FWK_SERVICE(type)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
void allocate_streams(unsigned int streams)
void preModuleConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
unsigned int maxNumberOfConcurrentLuminosityBlocks() const
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
unsigned int concurrentStreams_
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
std::vector< std::vector< double > > tmp
void postSourceRun(edm::RunIndex)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
const bool showModulePrefetching_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
unsigned int maxNumberOfConcurrentRuns() const
nvtxDomainHandle_t stream_domain(unsigned int sid)
void preEvent(edm::StreamContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::atomic< bool > globalFirstEventDone_
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
std::vector< std::atomic< bool > > streamFirstEventDone_
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal