12 #include <tbb/enumerable_thread_specific.h> 13 #include <tbb/concurrent_vector.h> 15 #include <boost/format.hpp> 17 #include <cuda_profiler_api.h> 18 #include <nvToolsExt.h> 47 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char*
message) {
48 nvtxEventAttributes_t eventAttrib = { 0 };
49 eventAttrib.version = NVTX_VERSION;
50 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
51 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
52 eventAttrib.message.ascii =
message;
53 return nvtxDomainRangePushEx(domain, &eventAttrib);
57 int nvtxDomainRangePushColor(nvtxDomainHandle_t domain, const
char*
message, uint32_t
color) {
58 nvtxEventAttributes_t eventAttrib = { 0 };
59 eventAttrib.version = NVTX_VERSION;
60 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
61 eventAttrib.colorType = NVTX_COLOR_ARGB;
62 eventAttrib.color =
color;
63 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
64 eventAttrib.message.ascii =
message;
65 return nvtxDomainRangePushEx(domain, &eventAttrib);
69 nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain, const
char*
message) {
70 nvtxEventAttributes_t eventAttrib = { 0 };
71 eventAttrib.version = NVTX_VERSION;
72 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
73 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
74 eventAttrib.message.ascii =
message;
75 return nvtxDomainRangeStartEx(domain, &eventAttrib);
78 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char*
message, uint32_t
color) {
79 nvtxEventAttributes_t eventAttrib = { 0 };
80 eventAttrib.version = NVTX_VERSION;
81 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
82 eventAttrib.colorType = NVTX_COLOR_ARGB;
83 eventAttrib.color =
color;
84 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
85 eventAttrib.message.ascii =
message;
86 return nvtxDomainRangeStartEx(domain, &eventAttrib);
89 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char*
message) {
90 nvtxEventAttributes_t eventAttrib = { 0 };
91 eventAttrib.version = NVTX_VERSION;
92 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
93 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
94 eventAttrib.message.ascii =
message;
95 nvtxDomainMarkEx(domain, &eventAttrib);
99 void nvtxDomainMarkColor(nvtxDomainHandle_t domain, const
char*
message, uint32_t
color) {
100 nvtxEventAttributes_t eventAttrib = { 0 };
101 eventAttrib.version = NVTX_VERSION;
102 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
103 eventAttrib.colorType = NVTX_COLOR_ARGB;
104 eventAttrib.color =
color;
105 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
106 eventAttrib.message.ascii =
message;
107 nvtxDomainMarkEx(domain, &eventAttrib);
111 nvtxBlack = 0x00000000,
112 nvtxRed = 0x00ff0000,
113 nvtxDarkGreen = 0x00009900,
114 nvtxGreen = 0x0000ff00,
115 nvtxLightGreen = 0x00ccffcc,
116 nvtxBlue = 0x000000ff,
117 nvtxAmber = 0x00ffbf00,
118 nvtxLightAmber = 0x00fff2cc,
119 nvtxWhite = 0x00ffffff
289 global = nvtxDomainCreate(
"EDM Global");
294 nvtxDomainDestroy(global);
295 for (
unsigned int sid = 0; sid < stream.size(); ++sid) {
296 nvtxDomainDestroy(stream[sid]);
301 stream.resize(streams);
302 for (
unsigned int sid = 0; sid < streams; ++sid) {
303 stream[sid] = nvtxDomainCreate((
boost::format(
"EDM Stream %d") % sid).
str().c_str());
314 return domains_.local().global;
318 return domains_.local().stream.at(sid);
324 highlightModules_(config.getUntrackedParameter<
std::vector<
std::
string>>(
"highlightModules")),
325 showModulePrefetching_(config.getUntrackedParameter<
bool>(
"showModulePrefetching")),
326 concurrentStreams_(0),
488 desc.
addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
489 desc.
addUntracked<
bool>(
"showModulePrefetching",
false)->setComment(
"Show the stack of dependencies that requested to run a module.");
490 descriptions.
add(
"NVProfilerService", desc);
491 descriptions.
setComment(R
"(This Service provides CMSSW-aware annotations to nvprof/nvvm. 494 - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job. 495 - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)"); 500 std::stringstream
out;
509 domain.allocate_streams(concurrentStreams_);
511 event_.resize(concurrentStreams_);
569 nvtxDomainRangePush(
global_domain(), (
"open file "s + lfn).c_str());
579 nvtxDomainRangePush(
global_domain(), (
"close file "s + lfn).c_str());
592 auto const &
msg =
label +
" begin stream";
611 auto const &
msg =
label +
" end stream";
692 nvtxDomainRangePush(
stream_domain(sid),
"stream begin lumi");
743 auto const &
msg =
label +
" prefetching";
762 auto mid = desc.
id();
765 auto const &
msg =
label +
" construction";
774 auto mid = desc.
id();
780 auto mid = desc.
id();
782 auto const &
msg =
label +
" begin job";
791 auto mid = desc.
id();
797 auto mid = desc.
id();
799 auto const &
msg =
label +
" end job";
808 auto mid = desc.
id();
881 auto const &
msg =
label +
" stream begin run";
900 auto const &
msg =
label +
" stream end run";
919 auto const &
msg =
label +
" stream begin lumi";
938 auto const &
msg =
label +
" stream end lumi";
956 auto const &
msg =
label +
" global begin run";
973 auto const &
msg =
label +
" global end run";
990 auto const &
msg =
label +
" global begin lumi";
1007 auto const &
msg =
label +
" global end lumi";
1022 auto mid = desc.
id();
1025 auto const &
msg =
label +
" construction";
1034 auto mid = desc.
id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
std::string const & pathName() const
unsigned int maxNumberOfThreads() const
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
void preOpenFile(std::string const &, bool)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postOpenFile(std::string const &, bool)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
tbb::enumerable_thread_specific< Domains > domains_
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void postCloseFile(std::string const &, bool)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
nvtxDomainHandle_t global
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
Domains(NVProfilerService *service)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void preSourceRun(edm::RunIndex)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
float __attribute__((vector_size(8))) cms_float32x2_t
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void preCloseFile(std::string const &, bool)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
nvtxDomainHandle_t global_domain()
void postStreamEndRun(edm::StreamContext const &)
std::vector< nvtxDomainHandle_t > stream
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void postSourceLumi(edm::LuminosityBlockIndex)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
std::string const & moduleLabel() const
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleConstruction(edm::ModuleDescription const &)
std::vector< ModuleDescription const * > const & allModules() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
bool showModulePrefetching_
void preStreamEndRun(edm::StreamContext const &)
unsigned int maxNumberOfStreams() const
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
ModuleDescription const * moduleDescription() const
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
format
Some error handling for the usage.
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
#define DEFINE_FWK_SERVICE(type)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
void allocate_streams(unsigned int streams)
void preModuleConstruction(edm::ModuleDescription const &)
unsigned int maxNumberOfConcurrentLuminosityBlocks() const
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
unsigned int concurrentStreams_
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void postSourceRun(edm::RunIndex)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
bool highlight(std::string const &)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
unsigned int maxNumberOfConcurrentRuns() const
nvtxDomainHandle_t stream_domain(unsigned int sid)
void preEvent(edm::StreamContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal