7 #include <oneapi/tbb/concurrent_vector.h> 9 #include <fmt/printf.h> 11 #include <cuda_profiler_api.h> 12 #include <nvToolsExt.h> 44 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char* message) {
45 nvtxEventAttributes_t eventAttrib = {};
46 eventAttrib.version = NVTX_VERSION;
47 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
48 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
49 eventAttrib.message.ascii = message;
50 return nvtxDomainRangePushEx(domain, &eventAttrib);
53 __attribute__((unused))
int nvtxDomainRangePushColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
54 nvtxEventAttributes_t eventAttrib = {};
55 eventAttrib.version = NVTX_VERSION;
56 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
57 eventAttrib.colorType = NVTX_COLOR_ARGB;
58 eventAttrib.color = color;
59 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
60 eventAttrib.message.ascii = message;
61 return nvtxDomainRangePushEx(domain, &eventAttrib);
64 __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain,
const char* message) {
65 nvtxEventAttributes_t eventAttrib = {};
66 eventAttrib.version = NVTX_VERSION;
67 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
68 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
69 eventAttrib.message.ascii = message;
70 return nvtxDomainRangeStartEx(domain, &eventAttrib);
73 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
74 nvtxEventAttributes_t eventAttrib = {};
75 eventAttrib.version = NVTX_VERSION;
76 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
77 eventAttrib.colorType = NVTX_COLOR_ARGB;
78 eventAttrib.color = color;
79 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
80 eventAttrib.message.ascii = message;
81 return nvtxDomainRangeStartEx(domain, &eventAttrib);
84 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char* message) {
85 nvtxEventAttributes_t eventAttrib = {};
86 eventAttrib.version = NVTX_VERSION;
87 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
88 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
89 eventAttrib.message.ascii = message;
90 nvtxDomainMarkEx(domain, &eventAttrib);
93 __attribute__((unused))
void nvtxDomainMarkColor(nvtxDomainHandle_t domain,
const char* message, uint32_t color) {
94 nvtxEventAttributes_t eventAttrib = {};
95 eventAttrib.version = NVTX_VERSION;
96 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
97 eventAttrib.colorType = NVTX_COLOR_ARGB;
98 eventAttrib.color = color;
99 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
100 eventAttrib.message.ascii = message;
101 nvtxDomainMarkEx(domain, &eventAttrib);
105 nvtxBlack = 0x00000000,
106 nvtxRed = 0x00ff0000,
107 nvtxDarkGreen = 0x00009900,
108 nvtxGreen = 0x0000ff00,
109 nvtxLightGreen = 0x00ccffcc,
110 nvtxBlue = 0x000000ff,
111 nvtxAmber = 0x00ffbf00,
112 nvtxLightAmber = 0x00fff2cc,
113 nvtxWhite = 0x00ffffff
116 constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
277 return (std::binary_search(highlightModules_.begin(), highlightModules_.end(),
label));
283 return highlight(
label) ? nvtxLightAmber : nvtxLightGreen;
293 std::atomic<bool> globalFirstEventDone_ =
false;
306 showModulePrefetching_(
config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
307 skipFirstEvent_(
config.getUntrackedParameter<
bool>(
"skipFirstEvent")) {
485 desc.addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
486 desc.addUntracked<
bool>(
"showModulePrefetching",
false)
487 ->setComment(
"Show the stack of dependencies that requested to run a module.");
488 desc.addUntracked<
bool>(
"skipFirstEvent",
false)
490 "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging " 491 "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start " 493 descriptions.
add(
"NVProfilerService",
desc);
494 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm. 496 Notes on nvprof options: 497 - the option '--profile-from-start off' should be used if skipFirstEvent is True. 498 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job. 499 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)"); 503 std::stringstream
out;
504 out <<
"preallocate: " <<
bounds.maxNumberOfConcurrentRuns() <<
" concurrent runs, " 505 <<
bounds.maxNumberOfConcurrentLuminosityBlocks() <<
" luminosity sections, " <<
bounds.maxNumberOfStreams()
506 <<
" streams\nrunning on " <<
bounds.maxNumberOfThreads() <<
" threads";
509 auto concurrentStreams =
bounds.maxNumberOfStreams();
512 for (
unsigned int sid = 0; sid < concurrentStreams; ++sid) {
513 stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf(
"EDM Stream %d", sid).c_str());
516 event_.resize(concurrentStreams);
520 std::vector<std::atomic<bool>>
tmp(concurrentStreams);
521 for (
auto& element :
tmp)
522 std::atomic_init(&element,
false);
618 auto const&
msg =
label +
" begin stream";
638 auto const&
msg =
label +
" end stream";
766 event_[sid] = nvtxInvalidRangeId;
771 bool expected =
false;
799 auto const&
msg =
label +
" prefetching";
816 auto mid =
desc.id();
819 auto const&
msg =
label +
" construction";
826 auto mid =
desc.id();
834 auto mid =
desc.id();
837 auto const&
msg =
label +
" destruction";
844 auto mid =
desc.id();
852 auto mid =
desc.id();
854 auto const&
msg =
label +
" begin job";
861 auto mid =
desc.id();
869 auto mid =
desc.id();
871 auto const&
msg =
label +
" end job";
878 auto mid =
desc.id();
889 auto const&
msg =
label +
" acquire";
976 auto const&
msg =
label +
" stream begin run";
996 auto const&
msg =
label +
" stream end run";
1016 auto const&
msg =
label +
" stream begin lumi";
1036 auto const&
msg =
label +
" stream end lumi";
1055 auto const&
msg =
label +
" global begin run";
1072 auto const&
msg =
label +
" global end run";
1089 auto const&
msg =
label +
" global begin lumi";
1106 auto const&
msg =
label +
" global end lumi";
1123 auto mid =
desc.id();
1126 auto const&
msg =
label +
" construction";
1133 auto mid =
desc.id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPreModuleDestruction(PreModuleDestruction::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
uint32_t labelColorLight(std::string const &label) const
void postStreamEndRun(edm::StreamContext const &)
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postCloseFile(std::string const &)
void postModuleConstruction(edm::ModuleDescription const &)
ProcessCallGraph callgraph_
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void postModuleDestruction(edm::ModuleDescription const &)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
bool highlight(std::string const &label) const
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
uint32_t labelColor(std::string const &label) const
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
float __attribute__((vector_size(8))) cms_float32x2_t
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
StreamID const & streamID() const
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void preCloseFile(std::string const &)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preStreamEndLumi(edm::StreamContext const &)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< nvtxDomainHandle_t > stream_domain_
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
nvtxDomainHandle_t global_domain_
void preOpenFile(std::string const &)
#define DEFINE_FWK_SERVICE(type)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
unsigned int size() const
void preModuleConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
const bool skipFirstEvent_
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void postSourceRun(edm::RunIndex)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void watchPostModuleDestruction(PostModuleDestruction::slot_type const &iSlot)
void postOpenFile(std::string const &)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
const bool showModulePrefetching_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preEvent(edm::StreamContext const &)
std::string const & pathName() const
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::atomic< bool > globalFirstEventDone_
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
std::vector< std::atomic< bool > > streamFirstEventDone_
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
std::string const & moduleLabel() const
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void preModuleDestruction(edm::ModuleDescription const &)
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal