12 #include <oneapi/tbb/concurrent_vector.h> 14 #include <fmt/printf.h> 16 #include <cuda_profiler_api.h> 17 #include <nvToolsExt.h> 49 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char* message) {
50 nvtxEventAttributes_t eventAttrib = {};
51 eventAttrib.version = NVTX_VERSION;
52 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
53 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
54 eventAttrib.message.ascii = message;
55 return nvtxDomainRangePushEx(domain, &eventAttrib);
58 __attribute__((unused))
int nvtxDomainRangePushColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
59 nvtxEventAttributes_t eventAttrib = {};
60 eventAttrib.version = NVTX_VERSION;
61 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
62 eventAttrib.colorType = NVTX_COLOR_ARGB;
63 eventAttrib.color = color;
64 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
65 eventAttrib.message.ascii = message;
66 return nvtxDomainRangePushEx(domain, &eventAttrib);
69 __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain,
const char* message) {
70 nvtxEventAttributes_t eventAttrib = {};
71 eventAttrib.version = NVTX_VERSION;
72 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
73 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
74 eventAttrib.message.ascii = message;
75 return nvtxDomainRangeStartEx(domain, &eventAttrib);
78 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
79 nvtxEventAttributes_t eventAttrib = {};
80 eventAttrib.version = NVTX_VERSION;
81 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
82 eventAttrib.colorType = NVTX_COLOR_ARGB;
83 eventAttrib.color = color;
84 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
85 eventAttrib.message.ascii = message;
86 return nvtxDomainRangeStartEx(domain, &eventAttrib);
89 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char* message) {
90 nvtxEventAttributes_t eventAttrib = {};
91 eventAttrib.version = NVTX_VERSION;
92 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
93 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
94 eventAttrib.message.ascii = message;
95 nvtxDomainMarkEx(domain, &eventAttrib);
98 __attribute__((unused))
void nvtxDomainMarkColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
99 nvtxEventAttributes_t eventAttrib = {};
100 eventAttrib.version = NVTX_VERSION;
101 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
102 eventAttrib.colorType = NVTX_COLOR_ARGB;
103 eventAttrib.color = color;
104 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
105 eventAttrib.message.ascii = message;
106 nvtxDomainMarkEx(domain, &eventAttrib);
110 nvtxBlack = 0x00000000,
111 nvtxRed = 0x00ff0000,
112 nvtxDarkGreen = 0x00009900,
113 nvtxGreen = 0x0000ff00,
114 nvtxLightGreen = 0x00ccffcc,
115 nvtxBlue = 0x000000ff,
116 nvtxAmber = 0x00ffbf00,
117 nvtxLightAmber = 0x00fff2cc,
118 nvtxWhite = 0x00ffffff
121 constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
282 return (std::binary_search(highlightModules_.begin(), highlightModules_.end(),
label));
288 return highlight(
label) ? nvtxLightAmber : nvtxLightGreen;
298 std::atomic<bool> globalFirstEventDone_ =
false;
311 showModulePrefetching_(
config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
312 skipFirstEvent_(
config.getUntrackedParameter<
bool>(
"skipFirstEvent")) {
488 desc.addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
489 desc.addUntracked<
bool>(
"showModulePrefetching",
false)
490 ->setComment(
"Show the stack of dependencies that requested to run a module.");
491 desc.addUntracked<
bool>(
"skipFirstEvent",
false)
493 "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging " 494 "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start " 496 descriptions.
add(
"NVProfilerService",
desc);
497 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm. 499 Notes on nvprof options: 500 - the option '--profile-from-start off' should be used if skipFirstEvent is True. 501 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job. 502 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)"); 506 std::stringstream
out;
507 out <<
"preallocate: " <<
bounds.maxNumberOfConcurrentRuns() <<
" concurrent runs, " 508 <<
bounds.maxNumberOfConcurrentLuminosityBlocks() <<
" luminosity sections, " <<
bounds.maxNumberOfStreams()
509 <<
" streams\nrunning on " <<
bounds.maxNumberOfThreads() <<
" threads";
512 auto concurrentStreams =
bounds.maxNumberOfStreams();
515 for (
unsigned int sid = 0; sid < concurrentStreams; ++sid) {
516 stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf(
"EDM Stream %d", sid).c_str());
519 event_.resize(concurrentStreams);
523 std::vector<std::atomic<bool>>
tmp(concurrentStreams);
524 for (
auto& element :
tmp)
525 std::atomic_init(&element,
false);
621 auto const&
msg =
label +
" begin stream";
641 auto const&
msg =
label +
" end stream";
769 event_[sid] = nvtxInvalidRangeId;
774 bool expected =
false;
802 auto const&
msg =
label +
" prefetching";
819 auto mid =
desc.id();
822 auto const&
msg =
label +
" construction";
829 auto mid =
desc.id();
837 auto mid =
desc.id();
840 auto const&
msg =
label +
" destruction";
847 auto mid =
desc.id();
855 auto mid =
desc.id();
857 auto const&
msg =
label +
" begin job";
864 auto mid =
desc.id();
872 auto mid =
desc.id();
874 auto const&
msg =
label +
" end job";
881 auto mid =
desc.id();
892 auto const&
msg =
label +
" acquire";
979 auto const&
msg =
label +
" stream begin run";
999 auto const&
msg =
label +
" stream end run";
1019 auto const&
msg =
label +
" stream begin lumi";
1039 auto const&
msg =
label +
" stream end lumi";
1058 auto const&
msg =
label +
" global begin run";
1075 auto const&
msg =
label +
" global end run";
1092 auto const&
msg =
label +
" global begin lumi";
1109 auto const&
msg =
label +
" global end lumi";
1126 auto mid =
desc.id();
1129 auto const&
msg =
label +
" construction";
1136 auto mid =
desc.id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPreModuleDestruction(PreModuleDestruction::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
uint32_t labelColorLight(std::string const &label) const
void postStreamEndRun(edm::StreamContext const &)
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postCloseFile(std::string const &)
void postModuleConstruction(edm::ModuleDescription const &)
ProcessCallGraph callgraph_
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void postModuleDestruction(edm::ModuleDescription const &)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
bool highlight(std::string const &label) const
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
uint32_t labelColor(std::string const &label) const
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
float __attribute__((vector_size(8))) cms_float32x2_t
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
StreamID const & streamID() const
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void preCloseFile(std::string const &)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preStreamEndLumi(edm::StreamContext const &)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< nvtxDomainHandle_t > stream_domain_
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
nvtxDomainHandle_t global_domain_
void preOpenFile(std::string const &)
#define DEFINE_FWK_SERVICE(type)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
unsigned int size() const
void preModuleConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
const bool skipFirstEvent_
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void postSourceRun(edm::RunIndex)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void watchPostModuleDestruction(PostModuleDestruction::slot_type const &iSlot)
void postOpenFile(std::string const &)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
const bool showModulePrefetching_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preEvent(edm::StreamContext const &)
std::string const & pathName() const
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::atomic< bool > globalFirstEventDone_
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
std::vector< std::atomic< bool > > streamFirstEventDone_
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
std::string const & moduleLabel() const
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void preModuleDestruction(edm::ModuleDescription const &)
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal