|
|
Go to the documentation of this file.
12 #include <tbb/concurrent_vector.h>
13 #include <tbb/enumerable_thread_specific.h>
15 #include <fmt/printf.h>
17 #include <cuda_profiler_api.h>
18 #include <nvToolsExt.h>
46 using namespace std::string_literals;
49 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char* message) {
50 nvtxEventAttributes_t eventAttrib = {};
51 eventAttrib.version = NVTX_VERSION;
52 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
53 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
54 eventAttrib.message.ascii = message;
55 return nvtxDomainRangePushEx(domain, &eventAttrib);
58 __attribute__((unused))
int nvtxDomainRangePushColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
59 nvtxEventAttributes_t eventAttrib = {};
60 eventAttrib.version = NVTX_VERSION;
61 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
62 eventAttrib.colorType = NVTX_COLOR_ARGB;
63 eventAttrib.color = color;
64 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
65 eventAttrib.message.ascii = message;
66 return nvtxDomainRangePushEx(domain, &eventAttrib);
69 __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain,
const char* message) {
70 nvtxEventAttributes_t eventAttrib = {};
71 eventAttrib.version = NVTX_VERSION;
72 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
73 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
74 eventAttrib.message.ascii = message;
75 return nvtxDomainRangeStartEx(domain, &eventAttrib);
78 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
79 nvtxEventAttributes_t eventAttrib = {};
80 eventAttrib.version = NVTX_VERSION;
81 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
82 eventAttrib.colorType = NVTX_COLOR_ARGB;
83 eventAttrib.color = color;
84 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
85 eventAttrib.message.ascii = message;
86 return nvtxDomainRangeStartEx(domain, &eventAttrib);
89 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char* message) {
90 nvtxEventAttributes_t eventAttrib = {};
91 eventAttrib.version = NVTX_VERSION;
92 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
93 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
94 eventAttrib.message.ascii = message;
95 nvtxDomainMarkEx(domain, &eventAttrib);
98 __attribute__((unused))
void nvtxDomainMarkColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
99 nvtxEventAttributes_t eventAttrib = {};
100 eventAttrib.version = NVTX_VERSION;
101 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
102 eventAttrib.colorType = NVTX_COLOR_ARGB;
103 eventAttrib.color = color;
104 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
105 eventAttrib.message.ascii = message;
106 nvtxDomainMarkEx(domain, &eventAttrib);
110 nvtxBlack = 0x00000000,
111 nvtxRed = 0x00ff0000,
112 nvtxDarkGreen = 0x00009900,
113 nvtxGreen = 0x0000ff00,
114 nvtxLightGreen = 0x00ccffcc,
115 nvtxBlue = 0x000000ff,
116 nvtxAmber = 0x00ffbf00,
117 nvtxLightAmber = 0x00fff2cc,
118 nvtxWhite = 0x00ffffff
121 constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
278 return (std::binary_search(highlightModules_.begin(), highlightModules_.end(),
label));
284 return highlight(
label) ? nvtxLightAmber : nvtxLightGreen;
291 std::atomic<bool> globalFirstEventDone_ =
false;
304 showModulePrefetching_(
config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
305 skipFirstEvent_(
config.getUntrackedParameter<
bool>(
"skipFirstEvent")) {
477 desc.addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
478 desc.addUntracked<
bool>(
"showModulePrefetching",
false)
479 ->setComment(
"Show the stack of dependencies that requested to run a module.");
480 desc.addUntracked<
bool>(
"skipFirstEvent",
false)
482 "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging "
483 "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start "
485 descriptions.
add(
"NVProfilerService",
desc);
486 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm.
488 Notes on nvprof options:
489 - the option '--profile-from-start off' should be used if skipFirstEvent is True.
490 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job.
491 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)");
495 std::stringstream
out;
504 for (
unsigned int sid = 0; sid < concurrentStreams; ++sid) {
505 stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf(
"EDM Stream %d", sid).c_str());
508 event_.resize(concurrentStreams);
512 std::vector<std::atomic<bool>>
tmp(concurrentStreams);
513 for (
auto& element :
tmp)
514 std::atomic_init(&element,
false);
609 auto const&
msg =
label +
" begin stream";
629 auto const&
msg =
label +
" end stream";
757 event_[sid] = nvtxInvalidRangeId;
762 bool expected =
false;
790 auto const&
msg =
label +
" prefetching";
807 auto mid =
desc.id();
810 auto const&
msg =
label +
" construction";
817 auto mid =
desc.id();
825 auto mid =
desc.id();
827 auto const&
msg =
label +
" begin job";
834 auto mid =
desc.id();
842 auto mid =
desc.id();
844 auto const&
msg =
label +
" end job";
851 auto mid =
desc.id();
862 auto const&
msg =
label +
" acquire";
949 auto const&
msg =
label +
" stream begin run";
969 auto const&
msg =
label +
" stream end run";
989 auto const&
msg =
label +
" stream begin lumi";
1009 auto const&
msg =
label +
" stream end lumi";
1028 auto const&
msg =
label +
" global begin run";
1045 auto const&
msg =
label +
" global end run";
1062 auto const&
msg =
label +
" global begin lumi";
1079 auto const&
msg =
label +
" global end lumi";
1094 auto mid =
desc.id();
1097 auto const&
msg =
label +
" construction";
1104 auto mid =
desc.id();
void postEvent(edm::StreamContext const &)
void postStreamEndRun(edm::StreamContext const &)
void postModuleEndJob(edm::ModuleDescription const &)
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
std::string const & moduleLabel() const
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void postSourceRun(edm::RunIndex)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
#define DEFINE_FWK_SERVICE(type)
uint32_t labelColor(std::string const &label) const
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void preGlobalEndRun(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
ModuleDescription const * moduleDescription() const
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
bool highlight(std::string const &label) const
unsigned int maxNumberOfConcurrentLuminosityBlocks() const
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void preModuleBeginJob(edm::ModuleDescription const &)
void postStreamEndLumi(edm::StreamContext const &)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void postOpenFile(std::string const &, bool)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void preSourceEvent(edm::StreamID)
void postSourceConstruction(edm::ModuleDescription const &)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
nvtxDomainHandle_t global_domain_
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
unsigned int maxNumberOfConcurrentRuns() const
void preStreamEndLumi(edm::StreamContext const &)
void preModuleConstruction(edm::ModuleDescription const &)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginLumi(edm::GlobalContext const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
std::vector< nvtxRangeId_t > event_
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void postModuleConstruction(edm::ModuleDescription const &)
void preCloseFile(std::string const &, bool)
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postGlobalEndLumi(edm::GlobalContext const &)
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void preModuleEndJob(edm::ModuleDescription const &)
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
unsigned int maxNumberOfThreads() const
void preallocate(edm::service::SystemBounds const &)
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
float __attribute__((vector_size(8))) cms_float32x2_t
void setComment(std::string const &value)
void postGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::atomic< bool > globalFirstEventDone_
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
const bool skipFirstEvent_
void postSourceEvent(edm::StreamID)
std::vector< std::atomic< bool > > streamFirstEventDone_
unsigned int maxNumberOfStreams() const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
std::string const & pathName() const
const bool showModulePrefetching_
void preEvent(edm::StreamContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
std::vector< std::string > highlightModules_
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
std::vector< nvtxDomainHandle_t > stream_domain_
void preGlobalBeginRun(edm::GlobalContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void postStreamBeginLumi(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
void postModuleBeginJob(edm::ModuleDescription const &)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void preOpenFile(std::string const &, bool)
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
std::vector< ModuleDescription const * > const & allModules() const
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
uint32_t labelColorLight(std::string const &label) const
void postCloseFile(std::string const &, bool)