10 #include <unordered_map> 11 #include <unordered_set> 14 #include <boost/range/irange.hpp> 17 #include <fmt/printf.h> 20 #include <nlohmann/json.hpp> 51 template <
class Rep,
class Period>
52 double ms(std::chrono::duration<Rep, Period> duration) {
53 return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).
count();
57 template <
class Rep,
class Period>
58 double ms(boost::chrono::duration<Rep, Period> duration) {
59 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).
count();
63 double ms(std::atomic<boost::chrono::nanoseconds::rep>
const& duration) {
64 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
73 uint64_t kB(std::atomic<uint64_t>
const& bytes) {
return bytes.load() / 1024; }
97 time_thread +=
other.time_thread;
98 time_real +=
other.time_real;
99 allocated +=
other.allocated;
100 deallocated +=
other.deallocated;
107 allocated +=
other.allocated.load();
108 deallocated +=
other.deallocated.load();
130 : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
146 time_thread =
other.time_thread.load();
147 time_real =
other.time_real.load();
148 allocated =
other.allocated.load();
149 deallocated =
other.deallocated.load();
154 time_thread +=
other.time_thread.load();
155 time_real +=
other.time_real.load();
156 allocated +=
other.allocated.load();
157 deallocated +=
other.deallocated.load();
162 time_thread +=
other.time_thread.count();
163 time_real +=
other.time_real.count();
164 allocated +=
other.allocated;
165 deallocated +=
other.deallocated;
176 return other + *
this;
192 has_acquire = has_acquire
or other.has_acquire;
213 active +=
other.active;
235 for (
auto&
path : endpaths)
243 for (
unsigned int i : boost::irange(0ul,
paths.size()))
246 for (
unsigned int i : boost::irange(0ul, endpaths.size()))
247 endpaths[
i] +=
other.endpaths[
i];
261 std::vector<GroupOfModules>
const& groups)
273 for (
auto&
module : highlight)
284 overhead +=
other.overhead;
285 eventsetup +=
other.eventsetup;
286 event +=
other.event;
287 assert(highlight.size() ==
other.highlight.size());
288 for (
unsigned int i : boost::irange(0ul, highlight.size()))
289 highlight[
i] +=
other.highlight[
i];
291 for (
unsigned int i : boost::irange(0ul,
modules.size()))
294 for (
unsigned int i : boost::irange(0ul,
processes.size()))
313 #ifdef DEBUG_THREAD_CONCURRENCY 314 id = std::this_thread::get_id();
315 #endif // DEBUG_THREAD_CONCURRENCY 323 #ifdef DEBUG_THREAD_CONCURRENCY 324 assert(std::this_thread::get_id() ==
id);
325 #endif // DEBUG_THREAD_CONCURRENCY 330 store.time_thread = new_time_thread - time_thread;
331 store.time_real = new_time_real - time_real;
332 store.allocated = new_allocated - allocated;
333 store.deallocated = new_deallocated - deallocated;
334 time_thread = new_time_thread;
335 time_real = new_time_real;
336 allocated = new_allocated;
337 deallocated = new_deallocated;
341 #ifdef DEBUG_THREAD_CONCURRENCY 342 assert(std::this_thread::get_id() ==
id);
343 #endif // DEBUG_THREAD_CONCURRENCY 348 store.time_thread += new_time_thread - time_thread;
349 store.time_real += new_time_real - time_real;
350 store.allocated += new_allocated - allocated;
351 store.deallocated += new_deallocated - deallocated;
352 time_thread = new_time_thread;
353 time_real = new_time_real;
354 allocated = new_allocated;
355 deallocated = new_deallocated;
359 #ifdef DEBUG_THREAD_CONCURRENCY 360 assert(std::this_thread::get_id() ==
id);
361 #endif // DEBUG_THREAD_CONCURRENCY 368 store.allocated += new_allocated - allocated;
369 store.deallocated += new_deallocated - deallocated;
370 time_thread = new_time_thread;
371 time_real = new_time_real;
372 allocated = new_allocated;
373 deallocated = new_deallocated;
382 unsigned int lumisections,
386 std::string y_title_ms = fmt::sprintf(
"events / %.1f ms",
ranges.time_resolution);
387 std::string y_title_kB = fmt::sprintf(
"events / %.1f kB",
ranges.memory_resolution);
391 booker.
book1D(
name +
" time_thread",
title +
" processing time (cpu)", time_bins, 0.,
ranges.time_range);
392 time_thread_->
setXTitle(
"processing time [ms]");
393 time_thread_->setYTitle(y_title_ms);
394 time_thread_->setStatOverflows(kTRUE);
396 time_real_ = booker.
book1D(
name +
" time_real",
title +
" processing time (real)", time_bins, 0.,
ranges.time_range);
397 time_real_->
setXTitle(
"processing time [ms]");
398 time_real_->setYTitle(y_title_ms);
399 time_real_->setStatOverflows(kTRUE);
402 allocated_ = booker.
book1D(
name +
" allocated",
title +
" allocated memory", mem_bins, 0.,
ranges.memory_range);
404 allocated_->setYTitle(y_title_kB);
405 allocated_->setStatOverflows(kTRUE);
408 booker.
book1D(
name +
" deallocated",
title +
" deallocated memory", mem_bins, 0.,
ranges.memory_range);
410 deallocated_->setYTitle(y_title_kB);
411 deallocated_->setStatOverflows(kTRUE);
418 title +
" processing time (cpu) vs. lumisection",
426 time_thread_byls_->
setXTitle(
"lumisection");
427 time_thread_byls_->setYTitle(
"processing time [ms]");
428 time_thread_byls_->setStatOverflows(kTRUE);
431 title +
" processing time (real) vs. lumisection",
439 time_real_byls_->
setXTitle(
"lumisection");
440 time_real_byls_->setYTitle(
"processing time [ms]");
441 time_real_byls_->setStatOverflows(kTRUE);
445 title +
" allocated memory vs. lumisection",
453 allocated_byls_->
setXTitle(
"lumisection");
454 allocated_byls_->setYTitle(
"memory [kB]");
455 allocated_byls_->setStatOverflows(kTRUE);
458 title +
" deallocated memory vs. lumisection",
466 deallocated_byls_->
setXTitle(
"lumisection");
467 deallocated_byls_->setYTitle(
"memory [kB]");
468 deallocated_byls_->setStatOverflows(kTRUE);
474 time_thread_->Fill(ms(
data.time_thread));
476 if (time_thread_byls_)
477 time_thread_byls_->Fill(lumisection, ms(
data.time_thread));
480 time_real_->Fill(ms(
data.time_real));
483 time_real_byls_->Fill(lumisection, ms(
data.time_real));
486 allocated_->Fill(kB(
data.allocated));
489 allocated_byls_->Fill(lumisection, kB(
data.allocated));
492 deallocated_->Fill(kB(
data.deallocated));
494 if (deallocated_byls_)
495 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
502 if (time_thread_byls_)
512 allocated_->Fill(kB(
data.allocated));
515 allocated_byls_->Fill(lumisection, kB(
data.allocated));
518 deallocated_->Fill(kB(
data.deallocated));
520 if (deallocated_byls_)
521 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
526 unsigned int lumisection) {
535 if (time_thread_byls_)
559 if (deallocated_byls_)
568 unsigned int lumisections,
573 total_.book(booker,
"path",
path.name_,
ranges, lumisections, byls);
576 unsigned int bins =
path.modules_and_dependencies_.size();
577 module_counter_ = booker.
book1DD(
"module_counter",
"module counter",
bins + 1, -0.5,
bins + 0.5);
579 module_counter_->setStatOverflows(kTRUE);
580 module_time_thread_total_ =
581 booker.
book1DD(
"module_time_thread_total",
"total module time (cpu)",
bins, -0.5,
bins - 0.5);
582 module_time_thread_total_->
setYTitle(
"processing time [ms]");
583 module_time_thread_total_->setStatOverflows(kTRUE);
584 module_time_real_total_ =
585 booker.
book1DD(
"module_time_real_total",
"total module time (real)",
bins, -0.5,
bins - 0.5);
586 module_time_real_total_->
setYTitle(
"processing time [ms]");
587 module_time_real_total_->setStatOverflows(kTRUE);
589 module_allocated_total_ =
590 booker.
book1DD(
"module_allocated_total",
"total allocated memory",
bins, -0.5,
bins - 0.5);
591 module_allocated_total_->
setYTitle(
"memory [kB]");
592 module_allocated_total_->setStatOverflows(kTRUE);
593 module_deallocated_total_ =
594 booker.
book1DD(
"module_deallocated_total",
"total deallocated memory",
bins, -0.5,
bins - 0.5);
595 module_deallocated_total_->
setYTitle(
"memory [kB]");
596 module_deallocated_total_->setStatOverflows(kTRUE);
598 for (
unsigned int bin : boost::irange(0u,
bins)) {
599 auto const&
module = job[
path.modules_and_dependencies_[
bin]];
601 module.scheduled_ ?
module.module_.moduleLabel() :
module.module_.moduleLabel() +
" (unscheduled)";
602 module_counter_->setBinLabel(
bin + 1,
label);
603 module_time_thread_total_->setBinLabel(
bin + 1,
label);
604 module_time_real_total_->setBinLabel(
bin + 1,
label);
606 module_allocated_total_->setBinLabel(
bin + 1,
label);
607 module_deallocated_total_->setBinLabel(
bin + 1,
label);
610 module_counter_->setBinLabel(
bins + 1,
"");
620 total_.fill(
path.total,
ls);
623 for (
unsigned int i = 0;
i <
path.last; ++
i) {
626 module_counter_->Fill(
i);
628 if (module_time_thread_total_)
629 module_time_thread_total_->Fill(
i, ms(
module.total.time_thread));
631 if (module_time_real_total_)
632 module_time_real_total_->Fill(
i, ms(
module.total.time_real));
634 if (module_allocated_total_)
635 module_allocated_total_->Fill(
i, kB(
module.total.allocated));
637 if (module_deallocated_total_)
638 module_deallocated_total_->Fill(
i, kB(
module.total.deallocated));
640 if (module_counter_ and
path.status)
641 module_counter_->Fill(
path.last);
652 unsigned int lumisections,
656 event_.book(booker,
"process " +
process.name_,
"process " +
process.name_, event_ranges, lumisections, byls);
659 for (
unsigned int id : boost::irange(0ul, paths_.size())) {
660 paths_[
id].book(booker,
"path ", job,
process.paths_[
id], path_ranges, lumisections, byls);
662 for (
unsigned int id : boost::irange(0ul, endpaths_.size())) {
663 endpaths_[
id].book(booker,
"endpath ", job,
process.endPaths_[
id], path_ranges, lumisections, byls);
677 for (
unsigned int id : boost::irange(0ul, paths_.size()))
681 for (
unsigned int id : boost::irange(0ul, endpaths_.size()))
686 : event_(), event_ex_(),
overhead_(), highlight_(groups.
size()), modules_(job.
size()), processes_() {
694 std::vector<GroupOfModules>
const& groups,
698 unsigned int lumisections,
706 event_.book(booker,
"event",
"Event", event_ranges, lumisections, byls);
708 event_ex_.book(booker,
"explicit",
"Event (explicit)", event_ranges, lumisections, byls);
710 overhead_.book(booker,
"overhead",
"Overhead", event_ranges, lumisections, byls);
712 modules_[job.
source().
id()].book(booker,
"source",
"Source", module_ranges, lumisections, byls);
715 lumi_.book(booker,
"lumi",
"LumiSection transitions", event_ranges, lumisections, byls);
717 run_.book(booker,
"run",
"Run transtions", event_ranges, lumisections,
false);
721 for (
unsigned int group : boost::irange(0ul, groups.size())) {
723 highlight_[
group].book(booker,
"highlight " +
label,
"Highlight " +
label, event_ranges, lumisections, byls);
727 for (
unsigned int pid : boost::irange(0ul, job.
processes().size())) {
729 processes_[pid].book(booker, job,
process, event_ranges, path_ranges, lumisections, bypath, byls);
733 for (
unsigned int id :
process.modules_) {
744 event_.fill(
data.total,
ls);
745 event_ex_.fill(
data.event,
ls);
749 for (
unsigned int group : boost::irange(0ul, highlight_.size()))
753 for (
unsigned int id : boost::irange(0ul, modules_.size()))
754 modules_[
id].fill(
data.modules[
id].total,
ls);
756 for (
unsigned int pid : boost::irange(0ul, processes_.size()))
797 config.getUntrackedParameter<
double>(
"dqmTimeResolution"),
798 config.getUntrackedParameter<
double>(
"dqmMemoryRange"),
799 config.getUntrackedParameter<
double>(
"dqmMemoryResolution")}),
800 dqm_path_ranges_({
config.getUntrackedParameter<
double>(
"dqmPathTimeRange"),
801 config.getUntrackedParameter<
double>(
"dqmPathTimeResolution"),
802 config.getUntrackedParameter<
double>(
"dqmPathMemoryRange"),
803 config.getUntrackedParameter<
double>(
"dqmPathMemoryResolution")}),
804 dqm_module_ranges_({
config.getUntrackedParameter<
double>(
"dqmModuleTimeRange"),
805 config.getUntrackedParameter<
double>(
"dqmModuleTimeResolution"),
806 config.getUntrackedParameter<
double>(
"dqmModuleMemoryRange"),
807 config.getUntrackedParameter<
double>(
"dqmModuleMemoryResolution")}),
808 dqm_lumisections_range_(
config.getUntrackedParameter<
unsigned int>(
"dqmLumiSectionsRange")),
811 highlight_module_psets_(
config.getUntrackedParameter<std::vector<edm::ParameterSet>>(
"highlightModules")),
812 highlight_modules_(highlight_module_psets_.size())
815 tbb::task_scheduler_observer::observe();
891 LogDebug(
"FastTimerService") <<
"The FastTimerService received is currently not monitoring the signal \"" << signal
897 if (unsupported_signals_.insert(signal).second)
898 edm::LogWarning(
"FastTimerService") <<
"The FastTimerService received the unsupported signal \"" << signal
900 <<
"Please report how to reproduce the issue to cms-hlt@cern.ch .";
904 ignoredSignal(__func__);
907 if (isFirstSubprocess(gc)) {
909 subprocess_global_run_check_[
index] = 0;
910 run_transition_[
index].reset();
911 run_summary_[
index].reset();
928 dqm_lumisections_range_,
929 enable_dqm_bymodule_,
932 enable_dqm_transitions_);
946 concurrent_lumis_ =
bounds.maxNumberOfConcurrentLuminosityBlocks();
947 concurrent_runs_ =
bounds.maxNumberOfConcurrentRuns();
948 concurrent_streams_ =
bounds.maxNumberOfStreams();
949 concurrent_threads_ =
bounds.maxNumberOfThreads();
951 if (enable_dqm_bynproc_)
952 dqm_path_ += fmt::sprintf(
953 "/Running on %s with %d streams on %d threads",
processor_model, concurrent_streams_, concurrent_threads_);
957 auto safe_for_dqm =
"/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
958 for (
auto&
c : dqm_path_)
959 if (safe_for_dqm.find(
c) == std::string::npos)
963 subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
964 for (
unsigned int i = 0;
i < concurrent_streams_; ++
i)
965 subprocess_event_check_[
i] = 0;
966 subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
967 for (
unsigned int i = 0;
i < concurrent_runs_; ++
i)
968 subprocess_global_run_check_[
i] = 0;
969 subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
970 for (
unsigned int i = 0;
i < concurrent_lumis_; ++
i)
971 subprocess_global_lumi_check_[
i] = 0;
974 lumi_transition_.resize(concurrent_lumis_);
975 run_transition_.resize(concurrent_runs_);
979 callgraph_.preSourceConstruction(
module);
984 callgraph_.preBeginJob(pathsAndConsumes,
context);
988 unsigned int modules = callgraph_.size();
991 for (
unsigned int group : boost::irange(0ul, highlight_module_psets_.size())) {
993 auto labels = highlight_module_psets_[
group].getUntrackedParameter<std::vector<std::string>>(
"modules");
996 highlight_modules_[
group].label = highlight_module_psets_[
group].getUntrackedParameter<
std::string>(
"label");
997 highlight_modules_[
group].modules.reserve(
labels.size());
1000 auto const&
label = callgraph_.module(
i).moduleLabel();
1002 highlight_modules_[
group].modules.push_back(
i);
1005 highlight_module_psets_.clear();
1009 streams_.resize(concurrent_streams_,
temp);
1010 run_summary_.resize(concurrent_runs_,
temp);
1011 job_summary_ =
temp;
1016 enable_dqm_ =
false;
1017 edm::LogWarning(
"FastTimerService") <<
"The DQMStore is not avalable, the DQM plots will not be generated";
1022 plots_ = std::make_unique<PlotsPerJob>(callgraph_, highlight_modules_);
1033 ignoredSignal(__func__);
1036 if (isFirstSubprocess(gc)) {
1038 subprocess_global_lumi_check_[
index] = 0;
1039 lumi_transition_[
index].reset();
1048 ignoredSignal(__func__);
1052 bool last = isLastSubprocess(subprocess_global_lumi_check_[
index]);
1061 if (enable_dqm_transitions_) {
1077 ignoredSignal(__func__);
1081 bool last = isLastSubprocess(subprocess_global_run_check_[
index]);
1087 if (print_run_summary_) {
1092 if (enable_dqm_transitions_) {
1093 plots_->fill_run(run_transition_[
index]);
1104 thread().measure_and_accumulate(lumi_transition_[
index]);
1109 tbb::task_scheduler_observer::observe(
false);
1111 if (print_job_summary_) {
1115 if (write_json_summary_) {
1116 writeSummaryJSON(job_summary_, json_filename_);
1120 template <
typename T>
1122 out <<
"FastReport ";
1123 if (
label.size() < 60)
1124 for (
unsigned int i = (60 -
label.size()) / 2;
i > 0; --
i)
1126 out <<
' ' <<
label <<
" Summary ";
1127 if (
label.size() < 60)
1128 for (
unsigned int i = (59 -
label.size()) / 2;
i > 0; --
i)
1133 template <
typename T>
1135 out <<
"FastReport CPU time Real time Allocated Deallocated " <<
label <<
"\n";
1139 template <
typename T>
1141 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1142 ms(
data.time_thread),
1144 +static_cast<int64_t>(kB(
data.allocated)),
1145 -static_cast<int64_t>(kB(
data.deallocated)),
1149 template <
typename T>
1151 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1154 +
static_cast<int64_t
>(kB(
data.allocated)),
1155 -static_cast<int64_t>(kB(
data.deallocated)),
1159 template <
typename T>
1161 printHeader(
out,
"Event");
1162 printEventHeader(
out,
"Modules");
1163 auto const& source_d = callgraph_.source();
1164 auto const&
source =
data.modules[source_d.id()];
1165 printEventLine(
out,
source.total, source_d.moduleLabel());
1166 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1167 auto const& proc_d = callgraph_.processDescription(
i);
1169 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1170 for (
unsigned int m : proc_d.modules_) {
1171 auto const& module_d = callgraph_.module(
m);
1173 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1176 printEventLine(
out,
data.total,
"total");
1178 printEventHeader(
out,
"Processes and Paths");
1179 printEventLine(
out,
source.total, source_d.moduleLabel());
1180 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1181 auto const& proc_d = callgraph_.processDescription(
i);
1183 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1184 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1185 auto const&
name = proc_d.paths_[
p].name_;
1187 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1188 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1190 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1191 auto const&
name = proc_d.endPaths_[
p].name_;
1193 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1194 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1197 printEventLine(
out,
data.total,
"total");
1199 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1200 printEventHeader(
out,
"Highlighted modules");
1202 auto const& module_d = callgraph_.module(
m);
1204 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1211 template <
typename T>
1215 out <<
"FastReport CPU time avg. when run Real time avg. when run Alloc. avg. when run Dealloc. avg. when run ";
1218 out <<
"FastReport CPU time avg. Real time avg. Alloc. avg. Dealloc. avg. ";
1224 template <
typename T>
1227 out <<
"FastReport CPU time sched. / depend. Real time sched. / depend. Alloc. sched. / depend. Dealloc. sched. / depend. ";
1233 template <
typename T>
1235 out << fmt::sprintf(
1237 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1246 template <
typename T>
1249 out << fmt::sprintf(
1251 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1254 (active ? ms(
data.time_thread) / active : 0),
1256 (active ? ms(
data.time_real) / active : 0),
1258 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1260 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1264 template <
typename T>
1269 out << fmt::sprintf(
1271 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1280 template <
typename T>
1283 out << fmt::sprintf(
1284 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1286 (active ? ms(
data.time_thread) / active : 0),
1288 (active ? ms(
data.time_real) / active : 0),
1290 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1292 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1296 template <
typename T>
1299 out << fmt::sprintf(
1300 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1312 template <
typename T>
1315 printSummaryHeader(
out,
"Modules",
true);
1316 auto const& source_d = callgraph_.source();
1317 auto const&
source =
data.modules[source_d.id()];
1319 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1320 auto const& proc_d = callgraph_.processDescription(
i);
1322 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1323 for (
unsigned int m : proc_d.modules_) {
1324 auto const& module_d = callgraph_.module(
m);
1326 printSummaryLine(
out,
module.total,
data.events,
module.events,
" " + module_d.moduleLabel());
1329 printSummaryLine(
out,
data.total,
data.events,
"total");
1330 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1331 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1333 printPathSummaryHeader(
out,
"Processes and Paths");
1334 printSummaryLine(
out,
source.total,
data.events, source_d.moduleLabel());
1335 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1336 auto const& proc_d = callgraph_.processDescription(
i);
1338 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1339 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1340 auto const&
name = proc_d.paths_[
p].name_;
1344 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1345 auto const&
name = proc_d.endPaths_[
p].name_;
1350 printSummaryLine(
out,
data.total,
data.events,
"total");
1351 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1352 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1354 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1355 printSummaryHeader(
out,
"Highlighted modules",
true);
1357 auto const& module_d = callgraph_.module(
m);
1366 template <
typename T>
1368 printEventHeader(
out,
"Transition");
1372 template <
typename T>
1376 T const&
data)
const {
1380 {
"time_thread", ms(
data.time_thread)},
1381 {
"time_real", ms(
data.time_real)},
1382 {
"mem_alloc", kB(
data.allocated)},
1383 {
"mem_free", kB(
data.deallocated)}};
1395 json{{
"time_thread",
"cpu time"}},
1396 json{{
"mem_alloc",
"allocated memory"}},
1397 json{{
"mem_free",
"deallocated memory"}}});
1400 j[
"total"] = encodeToJSON(
1401 "Job", callgraph_.processDescription(0).name_,
data.events,
data.total +
data.overhead +
data.eventsetup);
1405 for (
unsigned int i = 0;
i < callgraph_.size(); ++
i) {
1406 auto const&
module = callgraph_.module(
i);
1407 auto const& data_m =
data.modules[
i];
1408 j[
"modules"].push_back(encodeToJSON(
module, data_m));
1412 j[
"modules"].push_back(encodeToJSON(
"other",
"other",
data.events,
data.overhead));
1413 j[
"modules"].push_back(encodeToJSON(
"eventsetup",
"eventsetup",
data.events,
data.eventsetup));
1416 out << std::setw(2) <<
j << std::flush;
1432 unsigned int old_value =
check.fetch_add(1, std::memory_order_acq_rel);
1433 return (old_value == callgraph_.processes().size() - 1);
1439 ignoredSignal(__func__);
1443 auto&
stream = streams_[sid];
1444 auto&
process = callgraph_.processDescription(pid);
1448 for (
unsigned int id :
process.modules_)
1453 bool last = isLastSubprocess(subprocess_event_check_[sid]);
1458 stream.event_measurement.measure_and_store(
stream.event);
1461 unsigned int id = 0;
1465 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size()))
1471 std::lock_guard<std::mutex> guard(summary_mutex_);
1476 if (print_event_summary_) {
1488 auto&
stream = streams_[sid];
1492 subprocess_event_check_[sid] = 0;
1495 auto& measurement = thread();
1496 measurement.measure_and_accumulate(
stream.overhead);
1497 stream.event_measurement = measurement;
1502 unsigned int id = md.
id();
1503 auto&
stream = streams_[sid];
1506 thread().measure_and_store(
module.total);
1513 unsigned int id = pc.
pathID();
1514 auto&
stream = streams_[sid];
1516 data.status =
false;
1525 unsigned int id = pc.
pathID();
1526 auto&
stream = streams_[sid];
1530 pc.
isEndPath() ? callgraph_.processDescription(pid).endPaths_[
id] : callgraph_.processDescription(pid).paths_[
id];
1531 unsigned int index =
path.modules_on_path_.empty() ? 0 :
status.index() + 1;
1532 data.last =
path.modules_on_path_.empty() ? 0 :
path.last_dependency_of_module_[
status.index()];
1534 for (
unsigned int i = 0;
i <
index; ++
i) {
1538 for (
unsigned int i = 0;
i <
data.last; ++
i) {
1546 auto&
stream = streams_[sid];
1547 thread().measure_and_accumulate(
stream.overhead);
1552 unsigned int id = md.
id();
1554 auto&
stream = streams_[sid];
1557 module.has_acquire =
true;
1558 thread().measure_and_store(
module.total);
1563 auto&
stream = streams_[sid];
1564 thread().measure_and_accumulate(
stream.overhead);
1569 unsigned int id = md.
id();
1571 auto&
stream = streams_[sid];
1574 if (
module.has_acquire) {
1575 thread().measure_and_accumulate(
module.total);
1577 thread().measure_and_store(
module.total);
1583 unsupportedSignal(__func__);
1587 unsupportedSignal(__func__);
1591 ignoredSignal(__func__);
1595 ignoredSignal(__func__);
1601 thread().measure_and_accumulate(
stream.overhead);
1608 unsigned int id = md.
id();
1612 thread().measure_and_accumulate(
module.total);
1617 thread().measure_and_accumulate(overhead_);
1622 thread().measure_and_accumulate(run_transition_[
index]);
1626 thread().measure_and_accumulate(overhead_);
1631 thread().measure_and_accumulate(run_transition_[
index]);
1635 thread().measure_and_accumulate(overhead_);
1640 thread().measure_and_accumulate(lumi_transition_[
index]);
1644 thread().measure_and_accumulate(overhead_);
1649 thread().measure_and_accumulate(lumi_transition_[
index]);
1653 thread().measure_and_accumulate(overhead_);
1658 thread().measure_and_accumulate(run_transition_[
index]);
1662 thread().measure_and_accumulate(overhead_);
1667 thread().measure_and_accumulate(run_transition_[
index]);
1671 thread().measure_and_accumulate(overhead_);
1676 thread().measure_and_accumulate(lumi_transition_[
index]);
1680 thread().measure_and_accumulate(overhead_);
1685 thread().measure_and_accumulate(lumi_transition_[
index]);
1693 auto&
stream = streams_[sid];
1694 thread().measure_and_accumulate(
stream.overhead);
1702 auto&
stream = streams_[sid];
1703 thread().measure_and_accumulate(
stream.eventsetup);
1710 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard key creation failed: " << ::strerror(
err);
1716 auto ptr = ::pthread_getspecific(key_);
1719 auto p = thread_resources_.emplace_back(std::make_shared<specific_t>(
r));
1720 auto pp =
new std::shared_ptr<specific_t>(*p);
1721 auto err = ::pthread_setspecific(key_,
pp);
1723 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard pthread_setspecific failed: " << ::strerror(
err);
1731 return static_cast<std::shared_ptr<specific_t>*
>(
p);
1737 auto expected =
true;
1738 if ((*ps)->live_.compare_exchange_strong(expected,
false)) {
1740 (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1747 for (
auto&
p : thread_resources_) {
1748 auto expected =
true;
1749 if (
p->live_.compare_exchange_strong(expected,
false)) {
1750 p->measurement_.measure_and_accumulate(
p->resource_);
1756 return (*ptr(::pthread_getspecific(key_)))->measurement_;
1773 desc.addUntracked<
bool>(
"printEventSummary",
false);
1774 desc.addUntracked<
bool>(
"printRunSummary",
true);
1775 desc.addUntracked<
bool>(
"printJobSummary",
true);
1780 desc.addUntracked<
bool>(
"writeJSONSummary",
false);
1783 desc.addUntracked<
bool>(
"enableDQM",
true);
1784 desc.addUntracked<
bool>(
"enableDQMbyModule",
false);
1785 desc.addUntracked<
bool>(
"enableDQMbyPath",
false);
1786 desc.addUntracked<
bool>(
"enableDQMbyLumiSection",
false);
1787 desc.addUntracked<
bool>(
"enableDQMbyProcesses",
false);
1788 desc.addUntracked<
bool>(
"enableDQMTransitions",
false);
1789 desc.addUntracked<
double>(
"dqmTimeRange", 1000.);
1790 desc.addUntracked<
double>(
"dqmTimeResolution", 5.);
1791 desc.addUntracked<
double>(
"dqmMemoryRange", 1000000.);
1792 desc.addUntracked<
double>(
"dqmMemoryResolution", 5000.);
1793 desc.addUntracked<
double>(
"dqmPathTimeRange", 100.);
1794 desc.addUntracked<
double>(
"dqmPathTimeResolution", 0.5);
1795 desc.addUntracked<
double>(
"dqmPathMemoryRange", 1000000.);
1796 desc.addUntracked<
double>(
"dqmPathMemoryResolution", 5000.);
1797 desc.addUntracked<
double>(
"dqmModuleTimeRange", 40.);
1798 desc.addUntracked<
double>(
"dqmModuleTimeResolution", 0.2);
1799 desc.addUntracked<
double>(
"dqmModuleMemoryRange", 100000.);
1800 desc.addUntracked<
double>(
"dqmModuleMemoryResolution", 500.);
1801 desc.addUntracked<
unsigned>(
"dqmLumiSectionsRange", 2500);
1805 highlightModulesDescription.
addUntracked<std::vector<std::string>>(
"modules", {});
1806 highlightModulesDescription.addUntracked<
std::string>(
"label",
"producers");
1807 desc.addVPSetUntracked(
"highlightModules", highlightModulesDescription, {});
1812 ->setComment(
"This parameter is obsolete and will be ignored.");
1814 ->setComment(
"This parameter is obsolete and will be ignored.");
1816 ->setComment(
"This parameter is obsolete and will be ignored.");
1818 ->setComment(
"This parameter is obsolete and will be ignored.");
1820 ->setComment(
"This parameter is obsolete and will be ignored.");
1822 ->setComment(
"This parameter is obsolete and will be ignored.");
1824 ->setComment(
"This parameter is obsolete and will be ignored.");
1826 ->setComment(
"This parameter is obsolete and will be ignored.");
1828 ->setComment(
"This parameter is obsolete and will be ignored.");
1830 ->setComment(
"This parameter is obsolete and will be ignored.");
1832 ->setComment(
"This parameter is obsolete and will be ignored.");
1834 ->setComment(
"This parameter is obsolete and will be ignored.");
1836 ->setComment(
"This parameter is obsolete and will be ignored.");
1838 ->setComment(
"This parameter is obsolete and will be ignored.");
1840 descriptions.
add(
"FastTimerService",
desc);
ResourcesPerJob()=default
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
constexpr int32_t ceil(float num)
void fill(Resources const &, unsigned int lumisection)
ResourcesPerProcess operator+(ResourcesPerProcess const &other) const
void postStreamBeginRun(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void postGlobalEndLumi(edm::GlobalContext const &)
LuminosityBlockNumber_t luminosityBlock() const
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void fill_lumi(AtomicResources const &, unsigned int lumisection)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
ResourcesPerJob operator+(ResourcesPerJob const &other) const
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
void writeSummaryJSON(ResourcesPerJob const &data, std::string const &filename) const
virtual void setCurrentFolder(std::string const &fullpath)
const bool write_json_summary_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, ProcessCallGraph::ProcessType const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, unsigned int lumisections, bool bypath, bool byls)
PlotsPerJob(ProcessCallGraph const &job, std::vector< GroupOfModules > const &groups)
PlotsPerProcess(ProcessCallGraph::ProcessType const &)
void preGlobalEndLumi(edm::GlobalContext const &)
ModuleCallingContext const * getTopModuleCallingContext() const
edm::ModuleDescription const & source() const
virtual std::string pwd()
AtomicResources overhead_
void preESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
const std::string json_filename_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
void postESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
unsigned int concurrent_runs_
void postSourceRun(edm::RunIndex)
StreamContext const * streamContext() const
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
ResourcesPerProcess & operator+=(ResourcesPerProcess const &other)
void book(dqm::reco::DQMStore::IBooker &, std::string const &, ProcessCallGraph const &, ProcessCallGraph::PathType const &, PlotRanges const &ranges, unsigned int lumisections, bool byls)
const bool print_job_summary_
const bool enable_dqm_bypath_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, std::vector< GroupOfModules > const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, PlotRanges const &module_ranges, unsigned int lumisections, bool bymodule, bool bypath, bool byls, bool transitions)
AtomicResources operator+(AtomicResources const &other) const
LuminosityBlockNumber_t luminosityBlock() const
void printSummary(const std::map< unsigned int, SiStripDetSummary::Values > &map)
edm::ModuleDescription const & module(unsigned int module) const
void postStreamBeginLumi(edm::StreamContext const &)
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< ProcessType > const & processes() const
MonitorElement * book1DD(TString const &name, TString const &title, int nchX, double lowX, double highX, FUNC onbooking=NOOP())
ResourcesPerJob & operator+=(ResourcesPerJob const &other)
virtual void setXTitle(std::string const &title)
void fill(ProcessCallGraph::ProcessType const &, ResourcesPerJob const &, ResourcesPerProcess const &, unsigned int ls)
ResourcesPerModule operator+(ResourcesPerModule const &other) const
void printEvent(T &out, ResourcesPerJob const &) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
MonitorElement * bookProfile(TString const &name, TString const &title, int nchX, double lowX, double highX, int, double lowY, double highY, char const *option="s", FUNC onbooking=NOOP())
const std::string processor_model
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void postEvent(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
static uint64_t deallocated()
void fill_fraction(Resources const &, Resources const &, unsigned int lumisection)
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath & operator+=(ResourcesPerPath const &other)
Resources operator+(Resources const &other) const
unsigned int concurrent_lumis_
ResourcesPerModule() noexcept
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
bool register_thread(FastTimerService::AtomicResources &r)
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
json encodeToJSON(std::string const &type, std::string const &label, unsigned int events, T const &data) const
ProcessContext const * processContext() const
const bool enable_dqm_transitions_
LuminosityBlockIndex const & luminosityBlockIndex() const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
ProcessContext const * processContext() const
void ignoredSignal(const std::string &signal) const
LuminosityBlockID const & luminosityBlockID() const
void measure_and_store(Resources &store) noexcept
const bool enable_dqm_bymodule_
unsigned int pathID() const
void book(dqm::reco::DQMStore::IBooker &, std::string const &name, std::string const &title, PlotRanges const &ranges, unsigned int lumisections, bool byls)
void fill_run(AtomicResources const &)
#define DEFINE_FWK_SERVICE(type)
static bool is_available()
const bool enable_dqm_byls_
std::vector< PlotsPerProcess > processes_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
virtual void setYTitle(std::string const &title)
LuminosityBlockIndex const & luminosityBlockIndex() const
static std::shared_ptr< specific_t > * ptr(void *p)
unsigned long long uint64_t
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printEventHeader(T &out, std::string const &label) const
static uint64_t allocated()
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
static void retire_thread(void *t)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void printEventLine(T &out, Resources const &data, std::string const &label) const
void preStreamBeginLumi(edm::StreamContext const &)
ResourcesPerProcess(ProcessCallGraph::ProcessType const &process)
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printPathSummaryHeader(T &out, std::string const &label) const
bool isSubProcess() const
void preStreamEndRun(edm::StreamContext const &)
PlaceInPathContext const * placeInPathContext() const
RunIndex const & runIndex() const
char data[epos_bytes_allocation]
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printHeader(T &out, std::string const &label) const
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_accumulate(Resources &store) noexcept
ResourcesPerModule & operator+=(ResourcesPerModule const &other)
EventID const & eventID() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath operator+(ResourcesPerPath const &other) const
void preGlobalBeginLumi(edm::GlobalContext const &)
AtomicResources & operator+=(AtomicResources const &other)
Resources & operator+=(Resources const &other)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
PathContext const * pathContext() const
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ProcessType const & processDescription(unsigned int) const
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
unsigned int value() const
std::vector< ResourcesPerProcess > processes
Log< level::Warning, false > LogWarning
MonitorElement * book1D(TString const &name, TString const &title, int const nchX, double const lowX, double const highX, FUNC onbooking=NOOP())
UseScope< MonitorElementData::Scope::RUN > UseRunScope
AtomicResources & operator=(AtomicResources const &other)
ProcessCallGraph callgraph_
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::string const & moduleLabel() const
void preStreamBeginRun(edm::StreamContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
RunIndex const & runIndex() const
static std::string const source
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
void fill(ProcessCallGraph const &, ResourcesPerJob const &, unsigned int ls)
ParentContext const & parent() const
void fill(ProcessCallGraph::PathType const &, ResourcesPerJob const &, ResourcesPerPath const &, unsigned int lumisection)