10 #include <unordered_map> 11 #include <unordered_set> 14 #include <boost/range/irange.hpp> 17 #include <fmt/printf.h> 20 #include <nlohmann/json.hpp> 51 template <
class Rep,
class Period>
52 double ms(std::chrono::duration<Rep, Period> duration) {
53 return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).
count();
57 template <
class Rep,
class Period>
58 double ms(boost::chrono::duration<Rep, Period> duration) {
59 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).
count();
63 double ms(std::atomic<boost::chrono::nanoseconds::rep>
const& duration) {
64 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
73 uint64_t kB(std::atomic<uint64_t>
const& bytes) {
return bytes.load() / 1024; }
97 time_thread +=
other.time_thread;
98 time_real +=
other.time_real;
99 allocated +=
other.allocated;
100 deallocated +=
other.deallocated;
107 allocated +=
other.allocated.load();
108 deallocated +=
other.deallocated.load();
130 : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
146 time_thread =
other.time_thread.load();
147 time_real =
other.time_real.load();
148 allocated =
other.allocated.load();
149 deallocated =
other.deallocated.load();
154 time_thread +=
other.time_thread.load();
155 time_real +=
other.time_real.load();
156 allocated +=
other.allocated.load();
157 deallocated +=
other.deallocated.load();
162 time_thread +=
other.time_thread.count();
163 time_real +=
other.time_real.count();
164 allocated +=
other.allocated;
165 deallocated +=
other.deallocated;
176 return other + *
this;
192 has_acquire = has_acquire
or other.has_acquire;
213 active +=
other.active;
235 for (
auto&
path : endpaths)
243 for (
unsigned int i : boost::irange(0ul,
paths.size()))
246 for (
unsigned int i : boost::irange(0ul, endpaths.size()))
247 endpaths[
i] +=
other.endpaths[
i];
261 std::vector<GroupOfModules>
const& groups)
273 for (
auto&
module : highlight)
284 overhead +=
other.overhead;
285 eventsetup +=
other.eventsetup;
286 event +=
other.event;
287 assert(highlight.size() ==
other.highlight.size());
288 for (
unsigned int i : boost::irange(0ul, highlight.size()))
289 highlight[
i] +=
other.highlight[
i];
291 for (
unsigned int i : boost::irange(0ul,
modules.size()))
294 for (
unsigned int i : boost::irange(0ul,
processes.size()))
313 #ifdef DEBUG_THREAD_CONCURRENCY 314 id = std::this_thread::get_id();
315 #endif // DEBUG_THREAD_CONCURRENCY 323 #ifdef DEBUG_THREAD_CONCURRENCY 324 assert(std::this_thread::get_id() ==
id);
325 #endif // DEBUG_THREAD_CONCURRENCY 330 store.time_thread = new_time_thread - time_thread;
331 store.time_real = new_time_real - time_real;
332 store.allocated = new_allocated - allocated;
333 store.deallocated = new_deallocated - deallocated;
334 time_thread = new_time_thread;
335 time_real = new_time_real;
336 allocated = new_allocated;
337 deallocated = new_deallocated;
341 #ifdef DEBUG_THREAD_CONCURRENCY 342 assert(std::this_thread::get_id() ==
id);
343 #endif // DEBUG_THREAD_CONCURRENCY 348 store.time_thread += new_time_thread - time_thread;
349 store.time_real += new_time_real - time_real;
350 store.allocated += new_allocated - allocated;
351 store.deallocated += new_deallocated - deallocated;
352 time_thread = new_time_thread;
353 time_real = new_time_real;
354 allocated = new_allocated;
355 deallocated = new_deallocated;
359 #ifdef DEBUG_THREAD_CONCURRENCY 360 assert(std::this_thread::get_id() ==
id);
361 #endif // DEBUG_THREAD_CONCURRENCY 368 store.allocated += new_allocated - allocated;
369 store.deallocated += new_deallocated - deallocated;
370 time_thread = new_time_thread;
371 time_real = new_time_real;
372 allocated = new_allocated;
373 deallocated = new_deallocated;
382 unsigned int lumisections,
386 std::string y_title_ms = fmt::sprintf(
"events / %.1f ms",
ranges.time_resolution);
387 std::string y_title_kB = fmt::sprintf(
"events / %.1f kB",
ranges.memory_resolution);
391 booker.
book1D(
name +
" time_thread",
title +
" processing time (cpu)", time_bins, 0.,
ranges.time_range);
392 time_thread_->
setXTitle(
"processing time [ms]");
393 time_thread_->setYTitle(y_title_ms);
394 time_thread_->setStatOverflows(kTRUE);
396 time_real_ = booker.
book1D(
name +
" time_real",
title +
" processing time (real)", time_bins, 0.,
ranges.time_range);
397 time_real_->
setXTitle(
"processing time [ms]");
398 time_real_->setYTitle(y_title_ms);
399 time_real_->setStatOverflows(kTRUE);
402 allocated_ = booker.
book1D(
name +
" allocated",
title +
" allocated memory", mem_bins, 0.,
ranges.memory_range);
404 allocated_->setYTitle(y_title_kB);
405 allocated_->setStatOverflows(kTRUE);
408 booker.
book1D(
name +
" deallocated",
title +
" deallocated memory", mem_bins, 0.,
ranges.memory_range);
410 deallocated_->setYTitle(y_title_kB);
411 deallocated_->setStatOverflows(kTRUE);
418 title +
" processing time (cpu) vs. lumisection",
426 time_thread_byls_->
setXTitle(
"lumisection");
427 time_thread_byls_->setYTitle(
"processing time [ms]");
428 time_thread_byls_->setStatOverflows(kTRUE);
431 title +
" processing time (real) vs. lumisection",
439 time_real_byls_->
setXTitle(
"lumisection");
440 time_real_byls_->setYTitle(
"processing time [ms]");
441 time_real_byls_->setStatOverflows(kTRUE);
445 title +
" allocated memory vs. lumisection",
453 allocated_byls_->
setXTitle(
"lumisection");
454 allocated_byls_->setYTitle(
"memory [kB]");
455 allocated_byls_->setStatOverflows(kTRUE);
458 title +
" deallocated memory vs. lumisection",
466 deallocated_byls_->
setXTitle(
"lumisection");
467 deallocated_byls_->setYTitle(
"memory [kB]");
468 deallocated_byls_->setStatOverflows(kTRUE);
474 time_thread_->Fill(ms(
data.time_thread));
476 if (time_thread_byls_)
477 time_thread_byls_->Fill(lumisection, ms(
data.time_thread));
480 time_real_->Fill(ms(
data.time_real));
483 time_real_byls_->Fill(lumisection, ms(
data.time_real));
486 allocated_->Fill(kB(
data.allocated));
489 allocated_byls_->Fill(lumisection, kB(
data.allocated));
492 deallocated_->Fill(kB(
data.deallocated));
494 if (deallocated_byls_)
495 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
502 if (time_thread_byls_)
512 allocated_->Fill(kB(
data.allocated));
515 allocated_byls_->Fill(lumisection, kB(
data.allocated));
518 deallocated_->Fill(kB(
data.deallocated));
520 if (deallocated_byls_)
521 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
526 unsigned int lumisection) {
535 if (time_thread_byls_)
559 if (deallocated_byls_)
568 unsigned int lumisections,
575 total_.book(booker,
"path",
path.name_,
ranges, lumisections, byls);
578 unsigned int bins =
path.modules_and_dependencies_.size();
579 module_counter_ = booker.
book1DD(
"module_counter",
"module counter",
bins + 1, -0.5,
bins + 0.5);
581 module_counter_->setStatOverflows(kTRUE);
582 module_time_thread_total_ =
583 booker.
book1DD(
"module_time_thread_total",
"total module time (cpu)",
bins, -0.5,
bins - 0.5);
584 module_time_thread_total_->
setYTitle(
"processing time [ms]");
585 module_time_thread_total_->setStatOverflows(kTRUE);
586 module_time_real_total_ =
587 booker.
book1DD(
"module_time_real_total",
"total module time (real)",
bins, -0.5,
bins - 0.5);
588 module_time_real_total_->
setYTitle(
"processing time [ms]");
589 module_time_real_total_->setStatOverflows(kTRUE);
591 module_allocated_total_ =
592 booker.
book1DD(
"module_allocated_total",
"total allocated memory",
bins, -0.5,
bins - 0.5);
593 module_allocated_total_->
setYTitle(
"memory [kB]");
594 module_allocated_total_->setStatOverflows(kTRUE);
595 module_deallocated_total_ =
596 booker.
book1DD(
"module_deallocated_total",
"total deallocated memory",
bins, -0.5,
bins - 0.5);
597 module_deallocated_total_->
setYTitle(
"memory [kB]");
598 module_deallocated_total_->setStatOverflows(kTRUE);
600 for (
unsigned int bin : boost::irange(0u,
bins)) {
601 auto const&
module = job[
path.modules_and_dependencies_[
bin]];
603 module.scheduled_ ?
module.module_.moduleLabel() :
module.module_.moduleLabel() +
" (unscheduled)";
604 module_counter_->setBinLabel(
bin + 1,
label);
605 module_time_thread_total_->setBinLabel(
bin + 1,
label);
606 module_time_real_total_->setBinLabel(
bin + 1,
label);
608 module_allocated_total_->setBinLabel(
bin + 1,
label);
609 module_deallocated_total_->setBinLabel(
bin + 1,
label);
612 module_counter_->setBinLabel(
bins + 1,
"");
622 total_.fill(
path.total,
ls);
625 for (
unsigned int i = 0;
i <
path.last; ++
i) {
628 module_counter_->Fill(
i);
630 if (module_time_thread_total_)
631 module_time_thread_total_->Fill(
i, ms(
module.total.time_thread));
633 if (module_time_real_total_)
634 module_time_real_total_->Fill(
i, ms(
module.total.time_real));
636 if (module_allocated_total_)
637 module_allocated_total_->Fill(
i, kB(
module.total.allocated));
639 if (module_deallocated_total_)
640 module_deallocated_total_->Fill(
i, kB(
module.total.deallocated));
642 if (module_counter_ and
path.status)
643 module_counter_->Fill(
path.last);
654 unsigned int lumisections,
658 event_.book(booker,
"process " +
process.name_,
"process " +
process.name_, event_ranges, lumisections, byls);
661 for (
unsigned int id : boost::irange(0ul, paths_.size())) {
662 paths_[
id].book(booker,
"path ", job,
process.paths_[
id], path_ranges, lumisections, byls);
664 for (
unsigned int id : boost::irange(0ul, endpaths_.size())) {
665 endpaths_[
id].book(booker,
"endpath ", job,
process.endPaths_[
id], path_ranges, lumisections, byls);
679 for (
unsigned int id : boost::irange(0ul, paths_.size()))
683 for (
unsigned int id : boost::irange(0ul, endpaths_.size()))
688 : event_(), event_ex_(),
overhead_(), highlight_(groups.
size()), modules_(job.
size()), processes_() {
696 std::vector<GroupOfModules>
const& groups,
700 unsigned int lumisections,
708 event_.book(booker,
"event",
"Event", event_ranges, lumisections, byls);
710 event_ex_.book(booker,
"explicit",
"Event (explicit)", event_ranges, lumisections, byls);
712 overhead_.book(booker,
"overhead",
"Overhead", event_ranges, lumisections, byls);
714 modules_[job.
source().
id()].book(booker,
"source",
"Source", module_ranges, lumisections, byls);
717 lumi_.book(booker,
"lumi",
"LumiSection transitions", event_ranges, lumisections, byls);
719 run_.book(booker,
"run",
"Run transtions", event_ranges, lumisections,
false);
723 for (
unsigned int group : boost::irange(0ul, groups.size())) {
725 highlight_[
group].book(booker,
"highlight " +
label,
"Highlight " +
label, event_ranges, lumisections, byls);
729 for (
unsigned int pid : boost::irange(0ul, job.
processes().size())) {
731 processes_[pid].book(booker, job,
process, event_ranges, path_ranges, lumisections, bypath, byls);
735 for (
unsigned int id :
process.modules_) {
739 modules_[
id].book(booker, safe_label, module_label, module_ranges, lumisections, byls);
748 event_.fill(
data.total,
ls);
749 event_ex_.fill(
data.event,
ls);
753 for (
unsigned int group : boost::irange(0ul, highlight_.size()))
757 for (
unsigned int id : boost::irange(0ul, modules_.size()))
758 modules_[
id].fill(
data.modules[
id].total,
ls);
760 for (
unsigned int pid : boost::irange(0ul, processes_.size()))
801 config.getUntrackedParameter<
double>(
"dqmTimeResolution"),
802 config.getUntrackedParameter<
double>(
"dqmMemoryRange"),
803 config.getUntrackedParameter<
double>(
"dqmMemoryResolution")}),
804 dqm_path_ranges_({
config.getUntrackedParameter<
double>(
"dqmPathTimeRange"),
805 config.getUntrackedParameter<
double>(
"dqmPathTimeResolution"),
806 config.getUntrackedParameter<
double>(
"dqmPathMemoryRange"),
807 config.getUntrackedParameter<
double>(
"dqmPathMemoryResolution")}),
808 dqm_module_ranges_({
config.getUntrackedParameter<
double>(
"dqmModuleTimeRange"),
809 config.getUntrackedParameter<
double>(
"dqmModuleTimeResolution"),
810 config.getUntrackedParameter<
double>(
"dqmModuleMemoryRange"),
811 config.getUntrackedParameter<
double>(
"dqmModuleMemoryResolution")}),
812 dqm_lumisections_range_(
config.getUntrackedParameter<
unsigned int>(
"dqmLumiSectionsRange")),
815 highlight_module_psets_(
config.getUntrackedParameter<std::vector<edm::ParameterSet>>(
"highlightModules")),
816 highlight_modules_(highlight_module_psets_.size())
819 tbb::task_scheduler_observer::observe();
895 LogDebug(
"FastTimerService") <<
"The FastTimerService received is currently not monitoring the signal \"" << signal
901 if (unsupported_signals_.insert(signal).second)
902 edm::LogWarning(
"FastTimerService") <<
"The FastTimerService received the unsupported signal \"" << signal
904 <<
"Please report how to reproduce the issue to cms-hlt@cern.ch .";
908 ignoredSignal(__func__);
911 if (isFirstSubprocess(gc)) {
913 subprocess_global_run_check_[
index] = 0;
914 run_transition_[
index].reset();
915 run_summary_[
index].reset();
932 dqm_lumisections_range_,
933 enable_dqm_bymodule_,
936 enable_dqm_transitions_);
952 static const auto safe_for_dqm =
"/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
954 if (safe_for_dqm.find(
c) == std::string::npos)
959 concurrent_lumis_ =
bounds.maxNumberOfConcurrentLuminosityBlocks();
960 concurrent_runs_ =
bounds.maxNumberOfConcurrentRuns();
961 concurrent_streams_ =
bounds.maxNumberOfStreams();
962 concurrent_threads_ =
bounds.maxNumberOfThreads();
964 if (enable_dqm_bynproc_)
965 dqm_path_ += fmt::sprintf(
966 "/Running on %s with %d streams on %d threads",
processor_model, concurrent_streams_, concurrent_threads_);
969 fixForDQM(dqm_path_);
972 subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
973 for (
unsigned int i = 0;
i < concurrent_streams_; ++
i)
974 subprocess_event_check_[
i] = 0;
975 subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
976 for (
unsigned int i = 0;
i < concurrent_runs_; ++
i)
977 subprocess_global_run_check_[
i] = 0;
978 subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
979 for (
unsigned int i = 0;
i < concurrent_lumis_; ++
i)
980 subprocess_global_lumi_check_[
i] = 0;
983 lumi_transition_.resize(concurrent_lumis_);
984 run_transition_.resize(concurrent_runs_);
988 callgraph_.preSourceConstruction(
module);
993 callgraph_.preBeginJob(pathsAndConsumes,
context);
997 unsigned int modules = callgraph_.size();
1000 for (
unsigned int group : boost::irange(0ul, highlight_module_psets_.size())) {
1002 auto labels = highlight_module_psets_[
group].getUntrackedParameter<std::vector<std::string>>(
"modules");
1005 highlight_modules_[
group].label = highlight_module_psets_[
group].getUntrackedParameter<
std::string>(
"label");
1006 highlight_modules_[
group].modules.reserve(
labels.size());
1009 auto const&
label = callgraph_.module(
i).moduleLabel();
1011 highlight_modules_[
group].modules.push_back(
i);
1014 highlight_module_psets_.clear();
1018 streams_.resize(concurrent_streams_,
temp);
1019 run_summary_.resize(concurrent_runs_,
temp);
1020 job_summary_ =
temp;
1025 enable_dqm_ =
false;
1026 edm::LogWarning(
"FastTimerService") <<
"The DQMStore is not avalable, the DQM plots will not be generated";
1031 plots_ = std::make_unique<PlotsPerJob>(callgraph_, highlight_modules_);
1042 ignoredSignal(__func__);
1045 if (isFirstSubprocess(gc)) {
1047 subprocess_global_lumi_check_[
index] = 0;
1048 lumi_transition_[
index].reset();
1057 ignoredSignal(__func__);
1061 bool last = isLastSubprocess(subprocess_global_lumi_check_[
index]);
1070 if (enable_dqm_ and enable_dqm_transitions_) {
1086 ignoredSignal(__func__);
1090 bool last = isLastSubprocess(subprocess_global_run_check_[
index]);
1096 if (print_run_summary_) {
1101 if (enable_dqm_ and enable_dqm_transitions_) {
1102 plots_->fill_run(run_transition_[
index]);
1113 thread().measure_and_accumulate(lumi_transition_[
index]);
1118 tbb::task_scheduler_observer::observe(
false);
1120 if (print_job_summary_) {
1124 if (write_json_summary_) {
1125 writeSummaryJSON(job_summary_, json_filename_);
1129 template <
typename T>
1131 out <<
"FastReport ";
1132 if (
label.size() < 60)
1133 for (
unsigned int i = (60 -
label.size()) / 2;
i > 0; --
i)
1135 out <<
' ' <<
label <<
" Summary ";
1136 if (
label.size() < 60)
1137 for (
unsigned int i = (59 -
label.size()) / 2;
i > 0; --
i)
1142 template <
typename T>
1144 out <<
"FastReport CPU time Real time Allocated Deallocated " <<
label <<
"\n";
1148 template <
typename T>
1150 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1151 ms(
data.time_thread),
1153 +static_cast<int64_t>(kB(
data.allocated)),
1154 -static_cast<int64_t>(kB(
data.deallocated)),
1158 template <
typename T>
1160 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1163 +
static_cast<int64_t
>(kB(
data.allocated)),
1164 -static_cast<int64_t>(kB(
data.deallocated)),
1168 template <
typename T>
1170 printHeader(
out,
"Event");
1171 printEventHeader(
out,
"Modules");
1172 auto const& source_d = callgraph_.source();
1173 auto const&
source =
data.modules[source_d.id()];
1174 printEventLine(
out,
source.total, source_d.moduleLabel());
1175 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1176 auto const& proc_d = callgraph_.processDescription(
i);
1178 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1179 for (
unsigned int m : proc_d.modules_) {
1180 auto const& module_d = callgraph_.module(
m);
1182 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1185 printEventLine(
out,
data.total,
"total");
1187 printEventHeader(
out,
"Processes and Paths");
1188 printEventLine(
out,
source.total, source_d.moduleLabel());
1189 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1190 auto const& proc_d = callgraph_.processDescription(
i);
1192 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1193 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1194 auto const&
name = proc_d.paths_[
p].name_;
1196 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1197 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1199 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1200 auto const&
name = proc_d.endPaths_[
p].name_;
1202 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1203 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1206 printEventLine(
out,
data.total,
"total");
1208 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1209 printEventHeader(
out,
"Highlighted modules");
1211 auto const& module_d = callgraph_.module(
m);
1213 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1220 template <
typename T>
1224 out <<
"FastReport CPU time avg. when run Real time avg. when run Alloc. avg. when run Dealloc. avg. when run ";
1227 out <<
"FastReport CPU time avg. Real time avg. Alloc. avg. Dealloc. avg. ";
1233 template <
typename T>
1236 out <<
"FastReport CPU time sched. / depend. Real time sched. / depend. Alloc. sched. / depend. Dealloc. sched. / depend. ";
1242 template <
typename T>
1244 out << fmt::sprintf(
1246 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1255 template <
typename T>
1258 out << fmt::sprintf(
1260 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1263 (active ? ms(
data.time_thread) / active : 0),
1265 (active ? ms(
data.time_real) / active : 0),
1267 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1269 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1273 template <
typename T>
1278 out << fmt::sprintf(
1280 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1289 template <
typename T>
1292 out << fmt::sprintf(
1293 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1295 (active ? ms(
data.time_thread) / active : 0),
1297 (active ? ms(
data.time_real) / active : 0),
1299 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1301 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1305 template <
typename T>
1308 out << fmt::sprintf(
1309 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1321 template <
typename T>
1324 printSummaryHeader(
out,
"Modules",
true);
1325 auto const& source_d = callgraph_.source();
1326 auto const&
source =
data.modules[source_d.id()];
1328 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1329 auto const& proc_d = callgraph_.processDescription(
i);
1331 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1332 for (
unsigned int m : proc_d.modules_) {
1333 auto const& module_d = callgraph_.module(
m);
1335 printSummaryLine(
out,
module.total,
data.events,
module.events,
" " + module_d.moduleLabel());
1338 printSummaryLine(
out,
data.total,
data.events,
"total");
1339 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1340 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1342 printPathSummaryHeader(
out,
"Processes and Paths");
1343 printSummaryLine(
out,
source.total,
data.events, source_d.moduleLabel());
1344 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1345 auto const& proc_d = callgraph_.processDescription(
i);
1347 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1348 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1349 auto const&
name = proc_d.paths_[
p].name_;
1353 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1354 auto const&
name = proc_d.endPaths_[
p].name_;
1359 printSummaryLine(
out,
data.total,
data.events,
"total");
1360 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1361 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1363 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1364 printSummaryHeader(
out,
"Highlighted modules",
true);
1366 auto const& module_d = callgraph_.module(
m);
1375 template <
typename T>
1377 printEventHeader(
out,
"Transition");
1381 template <
typename T>
1385 T const&
data)
const {
1389 {
"time_thread", ms(
data.time_thread)},
1390 {
"time_real", ms(
data.time_real)},
1391 {
"mem_alloc", kB(
data.allocated)},
1392 {
"mem_free", kB(
data.deallocated)}};
1404 json{{
"time_thread",
"cpu time"}},
1405 json{{
"mem_alloc",
"allocated memory"}},
1406 json{{
"mem_free",
"deallocated memory"}}});
1409 j[
"total"] = encodeToJSON(
1410 "Job", callgraph_.processDescription(0).name_,
data.events,
data.total +
data.overhead +
data.eventsetup);
1414 for (
unsigned int i = 0;
i < callgraph_.size(); ++
i) {
1415 auto const&
module = callgraph_.module(
i);
1416 auto const& data_m =
data.modules[
i];
1417 j[
"modules"].push_back(encodeToJSON(
module, data_m));
1421 j[
"modules"].push_back(encodeToJSON(
"other",
"other",
data.events,
data.overhead));
1422 j[
"modules"].push_back(encodeToJSON(
"eventsetup",
"eventsetup",
data.events,
data.eventsetup));
1425 out << std::setw(2) <<
j << std::flush;
1441 unsigned int old_value =
check.fetch_add(1, std::memory_order_acq_rel);
1442 return (old_value == callgraph_.processes().size() - 1);
1448 ignoredSignal(__func__);
1452 auto&
stream = streams_[sid];
1453 auto&
process = callgraph_.processDescription(pid);
1457 for (
unsigned int id :
process.modules_)
1462 bool last = isLastSubprocess(subprocess_event_check_[sid]);
1467 stream.event_measurement.measure_and_store(
stream.event);
1470 unsigned int id = 0;
1474 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size()))
1480 std::lock_guard<std::mutex> guard(summary_mutex_);
1485 if (print_event_summary_) {
1497 auto&
stream = streams_[sid];
1501 subprocess_event_check_[sid] = 0;
1504 auto& measurement = thread();
1505 measurement.measure_and_accumulate(
stream.overhead);
1506 stream.event_measurement = measurement;
1511 unsigned int id = md.
id();
1512 auto&
stream = streams_[sid];
1515 thread().measure_and_store(
module.total);
1522 unsigned int id = pc.
pathID();
1523 auto&
stream = streams_[sid];
1525 data.status =
false;
1534 unsigned int id = pc.
pathID();
1535 auto&
stream = streams_[sid];
1539 pc.
isEndPath() ? callgraph_.processDescription(pid).endPaths_[
id] : callgraph_.processDescription(pid).paths_[
id];
1540 unsigned int index =
path.modules_on_path_.empty() ? 0 :
status.index() + 1;
1541 data.last =
path.modules_on_path_.empty() ? 0 :
path.last_dependency_of_module_[
status.index()];
1543 for (
unsigned int i = 0;
i <
index; ++
i) {
1547 for (
unsigned int i = 0;
i <
data.last; ++
i) {
1555 auto&
stream = streams_[sid];
1556 thread().measure_and_accumulate(
stream.overhead);
1561 unsigned int id = md.
id();
1563 auto&
stream = streams_[sid];
1566 module.has_acquire =
true;
1567 thread().measure_and_store(
module.total);
1572 auto&
stream = streams_[sid];
1573 thread().measure_and_accumulate(
stream.overhead);
1578 unsigned int id = md.
id();
1580 auto&
stream = streams_[sid];
1583 if (
module.has_acquire) {
1584 thread().measure_and_accumulate(
module.total);
1586 thread().measure_and_store(
module.total);
1592 unsupportedSignal(__func__);
1596 unsupportedSignal(__func__);
1600 ignoredSignal(__func__);
1604 ignoredSignal(__func__);
1610 thread().measure_and_accumulate(
stream.overhead);
1617 unsigned int id = md.
id();
1621 thread().measure_and_accumulate(
module.total);
1626 thread().measure_and_accumulate(overhead_);
1631 thread().measure_and_accumulate(run_transition_[
index]);
1635 thread().measure_and_accumulate(overhead_);
1640 thread().measure_and_accumulate(run_transition_[
index]);
1644 thread().measure_and_accumulate(overhead_);
1649 thread().measure_and_accumulate(lumi_transition_[
index]);
1653 thread().measure_and_accumulate(overhead_);
1658 thread().measure_and_accumulate(lumi_transition_[
index]);
1662 thread().measure_and_accumulate(overhead_);
1667 thread().measure_and_accumulate(run_transition_[
index]);
1671 thread().measure_and_accumulate(overhead_);
1676 thread().measure_and_accumulate(run_transition_[
index]);
1680 thread().measure_and_accumulate(overhead_);
1685 thread().measure_and_accumulate(lumi_transition_[
index]);
1689 thread().measure_and_accumulate(overhead_);
1694 thread().measure_and_accumulate(lumi_transition_[
index]);
1698 auto top =
cc.getTopModuleCallingContext();
1701 unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1702 auto&
stream = streams_[sid];
1703 thread().measure_and_accumulate(
stream.overhead);
1708 auto top =
cc.getTopModuleCallingContext();
1710 unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1711 auto&
stream = streams_[sid];
1712 thread().measure_and_accumulate(
stream.eventsetup);
1719 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard key creation failed: " << ::strerror(
err);
1725 auto ptr = ::pthread_getspecific(key_);
1728 auto p = thread_resources_.emplace_back(std::make_shared<specific_t>(
r));
1729 auto pp =
new std::shared_ptr<specific_t>(*p);
1730 auto err = ::pthread_setspecific(key_,
pp);
1732 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard pthread_setspecific failed: " << ::strerror(
err);
1740 return static_cast<std::shared_ptr<specific_t>*
>(
p);
1746 auto expected =
true;
1747 if ((*ps)->live_.compare_exchange_strong(expected,
false)) {
1749 (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1756 for (
auto&
p : thread_resources_) {
1757 auto expected =
true;
1758 if (
p->live_.compare_exchange_strong(expected,
false)) {
1759 p->measurement_.measure_and_accumulate(
p->resource_);
1765 return (*ptr(::pthread_getspecific(key_)))->measurement_;
1782 desc.addUntracked<
bool>(
"printEventSummary",
false);
1783 desc.addUntracked<
bool>(
"printRunSummary",
true);
1784 desc.addUntracked<
bool>(
"printJobSummary",
true);
1789 desc.addUntracked<
bool>(
"writeJSONSummary",
false);
1792 desc.addUntracked<
bool>(
"enableDQM",
true);
1793 desc.addUntracked<
bool>(
"enableDQMbyModule",
false);
1794 desc.addUntracked<
bool>(
"enableDQMbyPath",
false);
1795 desc.addUntracked<
bool>(
"enableDQMbyLumiSection",
false);
1796 desc.addUntracked<
bool>(
"enableDQMbyProcesses",
false);
1797 desc.addUntracked<
bool>(
"enableDQMTransitions",
false);
1798 desc.addUntracked<
double>(
"dqmTimeRange", 1000.);
1799 desc.addUntracked<
double>(
"dqmTimeResolution", 5.);
1800 desc.addUntracked<
double>(
"dqmMemoryRange", 1000000.);
1801 desc.addUntracked<
double>(
"dqmMemoryResolution", 5000.);
1802 desc.addUntracked<
double>(
"dqmPathTimeRange", 100.);
1803 desc.addUntracked<
double>(
"dqmPathTimeResolution", 0.5);
1804 desc.addUntracked<
double>(
"dqmPathMemoryRange", 1000000.);
1805 desc.addUntracked<
double>(
"dqmPathMemoryResolution", 5000.);
1806 desc.addUntracked<
double>(
"dqmModuleTimeRange", 40.);
1807 desc.addUntracked<
double>(
"dqmModuleTimeResolution", 0.2);
1808 desc.addUntracked<
double>(
"dqmModuleMemoryRange", 100000.);
1809 desc.addUntracked<
double>(
"dqmModuleMemoryResolution", 500.);
1810 desc.addUntracked<
unsigned>(
"dqmLumiSectionsRange", 2500);
1814 highlightModulesDescription.
addUntracked<std::vector<std::string>>(
"modules", {});
1815 highlightModulesDescription.addUntracked<
std::string>(
"label",
"producers");
1816 desc.addVPSetUntracked(
"highlightModules", highlightModulesDescription, {});
1821 ->setComment(
"This parameter is obsolete and will be ignored.");
1823 ->setComment(
"This parameter is obsolete and will be ignored.");
1825 ->setComment(
"This parameter is obsolete and will be ignored.");
1827 ->setComment(
"This parameter is obsolete and will be ignored.");
1829 ->setComment(
"This parameter is obsolete and will be ignored.");
1831 ->setComment(
"This parameter is obsolete and will be ignored.");
1833 ->setComment(
"This parameter is obsolete and will be ignored.");
1835 ->setComment(
"This parameter is obsolete and will be ignored.");
1837 ->setComment(
"This parameter is obsolete and will be ignored.");
1839 ->setComment(
"This parameter is obsolete and will be ignored.");
1841 ->setComment(
"This parameter is obsolete and will be ignored.");
1843 ->setComment(
"This parameter is obsolete and will be ignored.");
1845 ->setComment(
"This parameter is obsolete and will be ignored.");
1847 ->setComment(
"This parameter is obsolete and will be ignored.");
1849 descriptions.
add(
"FastTimerService",
desc);
ResourcesPerJob()=default
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
constexpr int32_t ceil(float num)
void fill(Resources const &, unsigned int lumisection)
ResourcesPerProcess operator+(ResourcesPerProcess const &other) const
void postStreamBeginRun(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void postGlobalEndLumi(edm::GlobalContext const &)
LuminosityBlockNumber_t luminosityBlock() const
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void fill_lumi(AtomicResources const &, unsigned int lumisection)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
ResourcesPerJob operator+(ResourcesPerJob const &other) const
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
void writeSummaryJSON(ResourcesPerJob const &data, std::string const &filename) const
virtual void setCurrentFolder(std::string const &fullpath)
const bool write_json_summary_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, ProcessCallGraph::ProcessType const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, unsigned int lumisections, bool bypath, bool byls)
PlotsPerJob(ProcessCallGraph const &job, std::vector< GroupOfModules > const &groups)
PlotsPerProcess(ProcessCallGraph::ProcessType const &)
void preGlobalEndLumi(edm::GlobalContext const &)
uint32_t cc[maxCellsPerHit]
edm::ModuleDescription const & source() const
virtual std::string pwd()
AtomicResources overhead_
void preESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
const std::string json_filename_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
void postESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
unsigned int concurrent_runs_
void postSourceRun(edm::RunIndex)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
ResourcesPerProcess & operator+=(ResourcesPerProcess const &other)
void book(dqm::reco::DQMStore::IBooker &, std::string const &, ProcessCallGraph const &, ProcessCallGraph::PathType const &, PlotRanges const &ranges, unsigned int lumisections, bool byls)
const bool print_job_summary_
const bool enable_dqm_bypath_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, std::vector< GroupOfModules > const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, PlotRanges const &module_ranges, unsigned int lumisections, bool bymodule, bool bypath, bool byls, bool transitions)
AtomicResources operator+(AtomicResources const &other) const
LuminosityBlockNumber_t luminosityBlock() const
void printSummary(const std::map< unsigned int, SiStripDetSummary::Values > &map)
edm::ModuleDescription const & module(unsigned int module) const
void postStreamBeginLumi(edm::StreamContext const &)
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< ProcessType > const & processes() const
MonitorElement * book1DD(TString const &name, TString const &title, int nchX, double lowX, double highX, FUNC onbooking=NOOP())
ResourcesPerJob & operator+=(ResourcesPerJob const &other)
virtual void setXTitle(std::string const &title)
void fill(ProcessCallGraph::ProcessType const &, ResourcesPerJob const &, ResourcesPerProcess const &, unsigned int ls)
ResourcesPerModule operator+(ResourcesPerModule const &other) const
void printEvent(T &out, ResourcesPerJob const &) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
MonitorElement * bookProfile(TString const &name, TString const &title, int nchX, double lowX, double highX, int, double lowY, double highY, char const *option="s", FUNC onbooking=NOOP())
const std::string processor_model
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void postEvent(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
static uint64_t deallocated()
void fill_fraction(Resources const &, Resources const &, unsigned int lumisection)
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath & operator+=(ResourcesPerPath const &other)
Resources operator+(Resources const &other) const
unsigned int concurrent_lumis_
ResourcesPerModule() noexcept
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
bool register_thread(FastTimerService::AtomicResources &r)
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
json encodeToJSON(std::string const &type, std::string const &label, unsigned int events, T const &data) const
ProcessContext const * processContext() const
const bool enable_dqm_transitions_
LuminosityBlockIndex const & luminosityBlockIndex() const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
ProcessContext const * processContext() const
void ignoredSignal(const std::string &signal) const
LuminosityBlockID const & luminosityBlockID() const
void measure_and_store(Resources &store) noexcept
const bool enable_dqm_bymodule_
unsigned int pathID() const
void book(dqm::reco::DQMStore::IBooker &, std::string const &name, std::string const &title, PlotRanges const &ranges, unsigned int lumisections, bool byls)
void fill_run(AtomicResources const &)
#define DEFINE_FWK_SERVICE(type)
static bool is_available()
const bool enable_dqm_byls_
std::vector< PlotsPerProcess > processes_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
virtual void setYTitle(std::string const &title)
LuminosityBlockIndex const & luminosityBlockIndex() const
static std::shared_ptr< specific_t > * ptr(void *p)
unsigned long long uint64_t
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printEventHeader(T &out, std::string const &label) const
static uint64_t allocated()
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
static void retire_thread(void *t)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void printEventLine(T &out, Resources const &data, std::string const &label) const
void preStreamBeginLumi(edm::StreamContext const &)
ResourcesPerProcess(ProcessCallGraph::ProcessType const &process)
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printPathSummaryHeader(T &out, std::string const &label) const
bool isSubProcess() const
void preStreamEndRun(edm::StreamContext const &)
RunIndex const & runIndex() const
char data[epos_bytes_allocation]
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printHeader(T &out, std::string const &label) const
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_accumulate(Resources &store) noexcept
ResourcesPerModule & operator+=(ResourcesPerModule const &other)
EventID const & eventID() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath operator+(ResourcesPerPath const &other) const
void preGlobalBeginLumi(edm::GlobalContext const &)
AtomicResources & operator+=(AtomicResources const &other)
Resources & operator+=(Resources const &other)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ProcessType const & processDescription(unsigned int) const
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
unsigned int value() const
std::vector< ResourcesPerProcess > processes
Log< level::Warning, false > LogWarning
MonitorElement * book1D(TString const &name, TString const &title, int const nchX, double const lowX, double const highX, FUNC onbooking=NOOP())
UseScope< MonitorElementData::Scope::RUN > UseRunScope
AtomicResources & operator=(AtomicResources const &other)
ProcessCallGraph callgraph_
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::string const & moduleLabel() const
void preStreamBeginRun(edm::StreamContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
RunIndex const & runIndex() const
static std::string const source
static void fixForDQM(std::string &label)
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
void fill(ProcessCallGraph const &, ResourcesPerJob const &, unsigned int ls)
void fill(ProcessCallGraph::PathType const &, ResourcesPerJob const &, ResourcesPerPath const &, unsigned int lumisection)