12 #include <oneapi/tbb/concurrent_vector.h> 14 #include <fmt/printf.h> 16 #include <cuda_profiler_api.h> 17 #include <nvToolsExt.h> 48 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char* message) {
49 nvtxEventAttributes_t eventAttrib = {};
50 eventAttrib.version = NVTX_VERSION;
51 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
52 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
53 eventAttrib.message.ascii = message;
54 return nvtxDomainRangePushEx(domain, &eventAttrib);
57 __attribute__((unused))
int nvtxDomainRangePushColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
58 nvtxEventAttributes_t eventAttrib = {};
59 eventAttrib.version = NVTX_VERSION;
60 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
61 eventAttrib.colorType = NVTX_COLOR_ARGB;
62 eventAttrib.color = color;
63 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
64 eventAttrib.message.ascii = message;
65 return nvtxDomainRangePushEx(domain, &eventAttrib);
68 __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain,
const char* message) {
69 nvtxEventAttributes_t eventAttrib = {};
70 eventAttrib.version = NVTX_VERSION;
71 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
72 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
73 eventAttrib.message.ascii = message;
74 return nvtxDomainRangeStartEx(domain, &eventAttrib);
77 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
78 nvtxEventAttributes_t eventAttrib = {};
79 eventAttrib.version = NVTX_VERSION;
80 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
81 eventAttrib.colorType = NVTX_COLOR_ARGB;
82 eventAttrib.color = color;
83 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
84 eventAttrib.message.ascii = message;
85 return nvtxDomainRangeStartEx(domain, &eventAttrib);
88 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char* message) {
89 nvtxEventAttributes_t eventAttrib = {};
90 eventAttrib.version = NVTX_VERSION;
91 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
92 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
93 eventAttrib.message.ascii = message;
94 nvtxDomainMarkEx(domain, &eventAttrib);
97 __attribute__((unused))
void nvtxDomainMarkColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
98 nvtxEventAttributes_t eventAttrib = {};
99 eventAttrib.version = NVTX_VERSION;
100 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
101 eventAttrib.colorType = NVTX_COLOR_ARGB;
102 eventAttrib.color = color;
103 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
104 eventAttrib.message.ascii = message;
105 nvtxDomainMarkEx(domain, &eventAttrib);
109 nvtxBlack = 0x00000000,
110 nvtxRed = 0x00ff0000,
111 nvtxDarkGreen = 0x00009900,
112 nvtxGreen = 0x0000ff00,
113 nvtxLightGreen = 0x00ccffcc,
114 nvtxBlue = 0x000000ff,
115 nvtxAmber = 0x00ffbf00,
116 nvtxLightAmber = 0x00fff2cc,
117 nvtxWhite = 0x00ffffff
120 constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
281 return (std::binary_search(highlightModules_.begin(), highlightModules_.end(),
label));
287 return highlight(
label) ? nvtxLightAmber : nvtxLightGreen;
294 std::atomic<bool> globalFirstEventDone_ =
false;
307 showModulePrefetching_(
config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
308 skipFirstEvent_(
config.getUntrackedParameter<
bool>(
"skipFirstEvent")) {
484 desc.addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
485 desc.addUntracked<
bool>(
"showModulePrefetching",
false)
486 ->setComment(
"Show the stack of dependencies that requested to run a module.");
487 desc.addUntracked<
bool>(
"skipFirstEvent",
false)
489 "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging " 490 "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start " 492 descriptions.
add(
"NVProfilerService",
desc);
493 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm. 495 Notes on nvprof options: 496 - the option '--profile-from-start off' should be used if skipFirstEvent is True. 497 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job. 498 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)"); 502 std::stringstream
out;
503 out <<
"preallocate: " <<
bounds.maxNumberOfConcurrentRuns() <<
" concurrent runs, " 504 <<
bounds.maxNumberOfConcurrentLuminosityBlocks() <<
" luminosity sections, " <<
bounds.maxNumberOfStreams()
505 <<
" streams\nrunning on" <<
bounds.maxNumberOfThreads() <<
" threads";
508 auto concurrentStreams =
bounds.maxNumberOfStreams();
511 for (
unsigned int sid = 0; sid < concurrentStreams; ++sid) {
512 stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf(
"EDM Stream %d", sid).c_str());
515 event_.resize(concurrentStreams);
519 std::vector<std::atomic<bool>>
tmp(concurrentStreams);
520 for (
auto& element :
tmp)
521 std::atomic_init(&element,
false);
616 auto const&
msg =
label +
" begin stream";
636 auto const&
msg =
label +
" end stream";
764 event_[sid] = nvtxInvalidRangeId;
769 bool expected =
false;
797 auto const&
msg =
label +
" prefetching";
814 auto mid =
desc.id();
817 auto const&
msg =
label +
" construction";
824 auto mid =
desc.id();
832 auto mid =
desc.id();
835 auto const&
msg =
label +
" destruction";
842 auto mid =
desc.id();
850 auto mid =
desc.id();
852 auto const&
msg =
label +
" begin job";
859 auto mid =
desc.id();
867 auto mid =
desc.id();
869 auto const&
msg =
label +
" end job";
876 auto mid =
desc.id();
887 auto const&
msg =
label +
" acquire";
974 auto const&
msg =
label +
" stream begin run";
994 auto const&
msg =
label +
" stream end run";
1014 auto const&
msg =
label +
" stream begin lumi";
1034 auto const&
msg =
label +
" stream end lumi";
1053 auto const&
msg =
label +
" global begin run";
1070 auto const&
msg =
label +
" global end run";
1087 auto const&
msg =
label +
" global begin lumi";
1104 auto const&
msg =
label +
" global end lumi";
1119 auto mid =
desc.id();
1122 auto const&
msg =
label +
" construction";
1129 auto mid =
desc.id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
std::vector< ModuleDescription const * > const & allModules() const
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPreModuleDestruction(PreModuleDestruction::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
uint32_t labelColorLight(std::string const &label) const
void postStreamEndRun(edm::StreamContext const &)
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postCloseFile(std::string const &)
void postModuleConstruction(edm::ModuleDescription const &)
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void postModuleDestruction(edm::ModuleDescription const &)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
bool highlight(std::string const &label) const
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
uint32_t labelColor(std::string const &label) const
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
float __attribute__((vector_size(8))) cms_float32x2_t
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
StreamID const & streamID() const
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void preCloseFile(std::string const &)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< nvtxDomainHandle_t > stream_domain_
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
nvtxDomainHandle_t global_domain_
void preOpenFile(std::string const &)
#define DEFINE_FWK_SERVICE(type)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
void preModuleConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
const bool skipFirstEvent_
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void postSourceRun(edm::RunIndex)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void watchPostModuleDestruction(PostModuleDestruction::slot_type const &iSlot)
void postOpenFile(std::string const &)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
const bool showModulePrefetching_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preEvent(edm::StreamContext const &)
std::string const & pathName() const
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::atomic< bool > globalFirstEventDone_
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
std::vector< std::atomic< bool > > streamFirstEventDone_
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
std::string const & moduleLabel() const
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void preModuleDestruction(edm::ModuleDescription const &)
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal