12 #include <tbb/enumerable_thread_specific.h> 13 #include <tbb/concurrent_vector.h> 15 #include <boost/format.hpp> 17 #include <cuda_profiler_api.h> 18 #include <nvToolsExt.h> 47 int nvtxDomainRangePush(nvtxDomainHandle_t domain,
const char*
message) {
48 nvtxEventAttributes_t eventAttrib = { 0 };
49 eventAttrib.version = NVTX_VERSION;
50 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
51 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
52 eventAttrib.message.ascii =
message;
53 return nvtxDomainRangePushEx(domain, &eventAttrib);
57 int nvtxDomainRangePushColor(nvtxDomainHandle_t domain, const
char*
message, uint32_t
color) {
58 nvtxEventAttributes_t eventAttrib = { 0 };
59 eventAttrib.version = NVTX_VERSION;
60 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
61 eventAttrib.colorType = NVTX_COLOR_ARGB;
62 eventAttrib.color =
color;
63 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
64 eventAttrib.message.ascii =
message;
65 return nvtxDomainRangePushEx(domain, &eventAttrib);
68 nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain,
const char*
message) {
69 nvtxEventAttributes_t eventAttrib = { 0 };
70 eventAttrib.version = NVTX_VERSION;
71 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
72 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
73 eventAttrib.message.ascii =
message;
74 return nvtxDomainRangeStartEx(domain, &eventAttrib);
77 nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain,
const char*
message, uint32_t
color) {
78 nvtxEventAttributes_t eventAttrib = { 0 };
79 eventAttrib.version = NVTX_VERSION;
80 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
81 eventAttrib.colorType = NVTX_COLOR_ARGB;
82 eventAttrib.color =
color;
83 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
84 eventAttrib.message.ascii =
message;
85 return nvtxDomainRangeStartEx(domain, &eventAttrib);
88 void nvtxDomainMark(nvtxDomainHandle_t domain,
const char*
message) {
89 nvtxEventAttributes_t eventAttrib = { 0 };
90 eventAttrib.version = NVTX_VERSION;
91 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
92 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
93 eventAttrib.message.ascii =
message;
94 nvtxDomainMarkEx(domain, &eventAttrib);
98 void nvtxDomainMarkColor(nvtxDomainHandle_t domain, const
char*
message, uint32_t
color) {
99 nvtxEventAttributes_t eventAttrib = { 0 };
100 eventAttrib.version = NVTX_VERSION;
101 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
102 eventAttrib.colorType = NVTX_COLOR_ARGB;
103 eventAttrib.color =
color;
104 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
105 eventAttrib.message.ascii =
message;
106 nvtxDomainMarkEx(domain, &eventAttrib);
110 nvtxBlack = 0x00000000,
111 nvtxRed = 0x00ff0000,
112 nvtxDarkGreen = 0x0000c000,
113 nvtxGreen = 0x0000ff00,
114 nvtxBlue = 0x000000ff,
115 nvtxAmber = 0x00ffbf00,
116 nvtxWhite = 0x00ffffff
194 void postSourceRun();
197 void preSourceLumi();
198 void postSourceLumi();
285 global = nvtxDomainCreate(
"EDM Global");
290 nvtxDomainDestroy(global);
291 for (
unsigned int sid = 0; sid < stream.size(); ++sid) {
292 nvtxDomainDestroy(stream[sid]);
297 stream.resize(streams);
298 for (
unsigned int sid = 0; sid < streams; ++sid) {
299 stream[sid] = nvtxDomainCreate((
boost::format(
"EDM Stream %d") % sid).
str().c_str());
310 return domains_.local().global;
314 return domains_.local().stream.at(sid);
320 highlightModules_(config.getUntrackedParameter<
std::vector<
std::
string>>(
"highlightModules")),
321 concurrentStreams_(0),
483 desc.
addUntracked<std::vector<std::string>>(
"highlightModules", {})->setComment(
"");
484 desc.
addUntracked<
bool>(
"showDelayedModules",
true)->setComment(
"");
485 descriptions.
add(
"NVProfilerService", desc);
486 descriptions.
setComment(
"This Service provides CMSSW-aware annotations to nvprof/nvvm.");
491 std::stringstream
out;
500 domain.allocate_streams(concurrentStreams_);
502 event_.resize(concurrentStreams_);
560 nvtxDomainRangePush(
global_domain(), (
"open file "s + lfn).c_str());
570 nvtxDomainRangePush(
global_domain(), (
"close file "s + lfn).c_str());
583 auto const &
msg =
label +
" begin stream";
602 auto const &
msg =
label +
" end stream";
683 nvtxDomainRangePush(
stream_domain(sid),
"stream begin lumi");
733 auto const &
msg =
label +
" prefetching";
749 auto mid = desc.
id();
752 auto const &
msg =
label +
" construction";
761 auto mid = desc.
id();
767 auto mid = desc.
id();
769 auto const &
msg =
label +
" begin job";
778 auto mid = desc.
id();
784 auto mid = desc.
id();
786 auto const &
msg =
label +
" end job";
795 auto mid = desc.
id();
868 auto const &
msg =
label +
" stream begin run";
887 auto const &
msg =
label +
" stream end run";
906 auto const &
msg =
label +
" stream begin lumi";
925 auto const &
msg =
label +
" stream end lumi";
943 auto const &
msg =
label +
" global begin run";
960 auto const &
msg =
label +
" global end run";
977 auto const &
msg =
label +
" global begin lumi";
994 auto const &
msg =
label +
" global end lumi";
1009 auto mid = desc.
id();
1012 auto const &
msg =
label +
" construction";
1021 auto mid = desc.
id();
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
std::string const & pathName() const
unsigned int maxNumberOfThreads() const
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
void postStreamEndLumi(edm::StreamContext const &)
void preOpenFile(std::string const &, bool)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalBeginRun(edm::GlobalContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
void preGlobalEndLumi(edm::GlobalContext const &)
void postOpenFile(std::string const &, bool)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
tbb::enumerable_thread_specific< Domains > domains_
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
void postCloseFile(std::string const &, bool)
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
nvtxDomainHandle_t global
void watchPostEvent(PostEvent::slot_type const &iSlot)
void preStreamBeginLumi(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
Domains(NVProfilerService *service)
void watchPreStreamEndRun(PreStreamEndRun::slot_type const &iSlot)
void watchPreSourceConstruction(PreSourceConstruction::slot_type const &iSlot)
void watchPostSourceConstruction(PostSourceConstruction::slot_type const &iSlot)
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postSourceConstruction(edm::ModuleDescription const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPostPathEvent(PostPathEvent::slot_type const &iSlot)
float __attribute__((vector_size(8))) cms_float32x2_t
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void preCloseFile(std::string const &, bool)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void watchPostGlobalBeginLumi(PostGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
nvtxDomainHandle_t global_domain()
void postStreamEndRun(edm::StreamContext const &)
std::vector< nvtxDomainHandle_t > stream
std::vector< std::vector< nvtxRangeId_t > > stream_modules_
void postModuleEndJob(edm::ModuleDescription const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void preModuleEndJob(edm::ModuleDescription const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
void preGlobalBeginLumi(edm::GlobalContext const &)
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
std::string const & moduleLabel() const
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void watchPreModuleEventPrefetching(PreModuleEventPrefetching::slot_type const &iSlot)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleConstruction(edm::ModuleDescription const &)
std::vector< ModuleDescription const * > const & allModules() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void postSourceEvent(edm::StreamID)
void watchPostModuleEventPrefetching(PostModuleEventPrefetching::slot_type const &iSlot)
void preStreamEndRun(edm::StreamContext const &)
unsigned int maxNumberOfStreams() const
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreOpenFile(PreOpenFile::slot_type const &iSlot)
void watchPostGlobalBeginRun(PostGlobalBeginRun::slot_type const &iSlot)
void watchPostCloseFile(PostCloseFile::slot_type const &iSlot)
void watchPreGlobalEndRun(PreGlobalEndRun::slot_type const &iSlot)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void preSourceConstruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreModuleEventDelayedGet(PreModuleEventDelayedGet::slot_type const &iSlot)
ModuleDescription const * moduleDescription() const
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void postStreamBeginLumi(edm::StreamContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
format
Some error handling for the usage.
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
void preModuleBeginJob(edm::ModuleDescription const &)
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postGlobalBeginLumi(edm::GlobalContext const &)
void watchPostStreamEndRun(PostStreamEndRun::slot_type const &iSlot)
tbb::concurrent_vector< nvtxRangeId_t > global_modules_
std::vector< nvtxRangeId_t > event_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preModuleBeginStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void preSourceEvent(edm::StreamID)
void watchPreGlobalBeginRun(PreGlobalBeginRun::slot_type const &iSlot)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
#define DEFINE_FWK_SERVICE(type)
void setComment(std::string const &value)
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
void allocate_streams(unsigned int streams)
void preModuleConstruction(edm::ModuleDescription const &)
unsigned int maxNumberOfConcurrentLuminosityBlocks() const
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
NVProfilerService(const edm::ParameterSet &, edm::ActivityRegistry &)
unsigned int concurrentStreams_
void watchPreCloseFile(PreCloseFile::slot_type const &iSlot)
void watchPostModuleEventDelayedGet(PostModuleEventDelayedGet::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
void preStreamBeginRun(edm::StreamContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void watchPostGlobalEndRun(PostGlobalEndRun::slot_type const &iSlot)
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void watchPostOpenFile(PostOpenFile::slot_type const &iSlot)
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
void postGlobalBeginRun(edm::GlobalContext const &)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
void watchPostStreamBeginRun(PostStreamBeginRun::slot_type const &iSlot)
void watchPreStreamBeginRun(PreStreamBeginRun::slot_type const &iSlot)
void preallocate(edm::service::SystemBounds const &)
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
void postModuleEndStream(edm::StreamContext const &, edm::ModuleCallingContext const &)
void watchPostModuleBeginJob(PostModuleBeginJob::slot_type const &iSlot)
bool highlight(std::string const &)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
unsigned int maxNumberOfConcurrentRuns() const
nvtxDomainHandle_t stream_domain(unsigned int sid)
void preEvent(edm::StreamContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
void postStreamBeginRun(edm::StreamContext const &)
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
std::vector< std::string > highlightModules_
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
void postModuleBeginJob(edm::ModuleDescription const &)
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal