10 #include <unordered_map>
11 #include <unordered_set>
14 #include <boost/range/irange.hpp>
17 #include <fmt/printf.h>
20 #include <nlohmann/json.hpp>
43 using namespace std::literals;
50 template <
class Rep,
class Period>
51 double ms(std::chrono::duration<Rep, Period> duration) {
52 return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).
count();
56 template <
class Rep,
class Period>
57 double ms(boost::chrono::duration<Rep, Period> duration) {
58 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).
count();
62 double ms(std::atomic<boost::chrono::nanoseconds::rep>
const& duration) {
63 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
72 uint64_t kB(std::atomic<uint64_t>
const& bytes) {
return bytes.load() / 1024; }
129 : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
132 : time_thread(other.time_thread.
load()),
133 time_real(other.time_real.
load()),
134 allocated(other.allocated.
load()),
135 deallocated(other.deallocated.
load()) {}
175 return other + *
this;
234 for (
auto&
path : endpaths)
242 for (
unsigned int i : boost::irange(0ul,
paths.size()))
245 for (
unsigned int i : boost::irange(0ul, endpaths.size()))
260 std::vector<GroupOfModules>
const& groups)
271 for (
auto&
module : highlight)
283 event += other.
event;
285 for (
unsigned int i : boost::irange(0ul, highlight.size()))
288 for (
unsigned int i : boost::irange(0ul,
modules.size()))
291 for (
unsigned int i : boost::irange(0ul,
processes.size()))
310 #ifdef DEBUG_THREAD_CONCURRENCY
311 id = std::this_thread::get_id();
312 #endif // DEBUG_THREAD_CONCURRENCY
320 #ifdef DEBUG_THREAD_CONCURRENCY
321 assert(std::this_thread::get_id() ==
id);
322 #endif // DEBUG_THREAD_CONCURRENCY
327 store.time_thread = new_time_thread - time_thread;
328 store.time_real = new_time_real - time_real;
329 store.allocated = new_allocated - allocated;
330 store.deallocated = new_deallocated - deallocated;
331 time_thread = new_time_thread;
332 time_real = new_time_real;
333 allocated = new_allocated;
334 deallocated = new_deallocated;
338 #ifdef DEBUG_THREAD_CONCURRENCY
339 assert(std::this_thread::get_id() ==
id);
340 #endif // DEBUG_THREAD_CONCURRENCY
345 store.time_thread += new_time_thread - time_thread;
346 store.time_real += new_time_real - time_real;
347 store.allocated += new_allocated - allocated;
348 store.deallocated += new_deallocated - deallocated;
349 time_thread = new_time_thread;
350 time_real = new_time_real;
351 allocated = new_allocated;
352 deallocated = new_deallocated;
356 #ifdef DEBUG_THREAD_CONCURRENCY
357 assert(std::this_thread::get_id() ==
id);
358 #endif // DEBUG_THREAD_CONCURRENCY
364 store.time_real += boost::chrono::duration_cast<boost::chrono::nanoseconds>(new_time_real - time_real).
count();
365 store.allocated += new_allocated - allocated;
366 store.deallocated += new_deallocated - deallocated;
367 time_thread = new_time_thread;
368 time_real = new_time_real;
369 allocated = new_allocated;
370 deallocated = new_deallocated;
379 unsigned int lumisections,
388 booker.
book1D(name +
" time_thread", title +
" processing time (cpu)", time_bins, 0., ranges.
time_range);
389 time_thread_->
setXTitle(
"processing time [ms]");
390 time_thread_->setYTitle(y_title_ms);
391 time_thread_->setStatOverflows(kTRUE);
393 time_real_ = booker.
book1D(name +
" time_real", title +
" processing time (real)", time_bins, 0., ranges.
time_range);
394 time_real_->
setXTitle(
"processing time [ms]");
395 time_real_->setYTitle(y_title_ms);
396 time_real_->setStatOverflows(kTRUE);
399 allocated_ = booker.
book1D(name +
" allocated", title +
" allocated memory", mem_bins, 0., ranges.
memory_range);
401 allocated_->setYTitle(y_title_kB);
402 allocated_->setStatOverflows(kTRUE);
405 booker.
book1D(name +
" deallocated", title +
" deallocated memory", mem_bins, 0., ranges.
memory_range);
407 deallocated_->setYTitle(y_title_kB);
408 deallocated_->setStatOverflows(kTRUE);
414 time_thread_byls_ = booker.
bookProfile(name +
" time_thread_byls",
415 title +
" processing time (cpu) vs. lumisection",
423 time_thread_byls_->
setXTitle(
"lumisection");
424 time_thread_byls_->setYTitle(
"processing time [ms]");
425 time_thread_byls_->setStatOverflows(kTRUE);
427 time_real_byls_ = booker.
bookProfile(name +
" time_real_byls",
428 title +
" processing time (real) vs. lumisection",
436 time_real_byls_->
setXTitle(
"lumisection");
437 time_real_byls_->setYTitle(
"processing time [ms]");
438 time_real_byls_->setStatOverflows(kTRUE);
441 allocated_byls_ = booker.
bookProfile(name +
" allocated_byls",
442 title +
" allocated memory vs. lumisection",
450 allocated_byls_->
setXTitle(
"lumisection");
451 allocated_byls_->setYTitle(
"memory [kB]");
452 allocated_byls_->setStatOverflows(kTRUE);
454 deallocated_byls_ = booker.
bookProfile(name +
" deallocated_byls",
455 title +
" deallocated memory vs. lumisection",
463 deallocated_byls_->
setXTitle(
"lumisection");
464 deallocated_byls_->setYTitle(
"memory [kB]");
465 deallocated_byls_->setStatOverflows(kTRUE);
473 if (time_thread_byls_)
474 time_thread_byls_->Fill(lumisection, ms(data.
time_thread));
480 time_real_byls_->Fill(lumisection, ms(data.
time_real));
486 allocated_byls_->Fill(lumisection, kB(data.
allocated));
491 if (deallocated_byls_)
492 deallocated_byls_->Fill(lumisection, kB(data.
deallocated));
499 if (time_thread_byls_)
512 allocated_byls_->Fill(lumisection, kB(data.
allocated));
517 if (deallocated_byls_)
518 deallocated_byls_->Fill(lumisection, kB(data.
deallocated));
523 unsigned int lumisection) {
530 time_thread_->Fill(total, fraction);
532 if (time_thread_byls_)
533 time_thread_byls_->Fill(lumisection, total, fraction);
538 time_real_->Fill(total, fraction);
541 time_real_byls_->Fill(lumisection, total, fraction);
546 allocated_->Fill(total, fraction);
549 allocated_byls_->Fill(lumisection, total, fraction);
554 deallocated_->Fill(total, fraction);
556 if (deallocated_byls_)
557 deallocated_byls_->Fill(lumisection, total, fraction);
565 unsigned int lumisections,
570 total_.book(booker,
"path", path.
name_, ranges, lumisections, byls);
574 module_counter_ = booker.
book1DD(
"module_counter",
"module counter", bins + 1, -0.5, bins + 0.5);
576 module_counter_->setStatOverflows(kTRUE);
577 module_time_thread_total_ =
578 booker.
book1DD(
"module_time_thread_total",
"total module time (cpu)", bins, -0.5, bins - 0.5);
579 module_time_thread_total_->
setYTitle(
"processing time [ms]");
580 module_time_thread_total_->setStatOverflows(kTRUE);
581 module_time_real_total_ =
582 booker.
book1DD(
"module_time_real_total",
"total module time (real)", bins, -0.5, bins - 0.5);
583 module_time_real_total_->
setYTitle(
"processing time [ms]");
584 module_time_real_total_->setStatOverflows(kTRUE);
586 module_allocated_total_ =
587 booker.
book1DD(
"module_allocated_total",
"total allocated memory", bins, -0.5, bins - 0.5);
588 module_allocated_total_->
setYTitle(
"memory [kB]");
589 module_allocated_total_->setStatOverflows(kTRUE);
590 module_deallocated_total_ =
591 booker.
book1DD(
"module_deallocated_total",
"total deallocated memory", bins, -0.5, bins - 0.5);
592 module_deallocated_total_->
setYTitle(
"memory [kB]");
593 module_deallocated_total_->setStatOverflows(kTRUE);
595 for (
unsigned int bin : boost::irange(0u, bins)) {
598 module.scheduled_ ?
module.module_.moduleLabel() :
module.module_.moduleLabel() +
" (unscheduled)";
599 module_counter_->setBinLabel(
bin + 1, label);
600 module_time_thread_total_->setBinLabel(
bin + 1, label);
601 module_time_real_total_->setBinLabel(
bin + 1, label);
603 module_allocated_total_->setBinLabel(
bin + 1, label);
604 module_deallocated_total_->setBinLabel(
bin + 1, label);
607 module_counter_->setBinLabel(bins + 1,
"");
617 total_.fill(path.
total, ls);
620 for (
unsigned int i = 0;
i < path.
last; ++
i) {
623 module_counter_->Fill(
i);
625 if (module_time_thread_total_)
626 module_time_thread_total_->Fill(
i, ms(
module.total.time_thread));
628 if (module_time_real_total_)
629 module_time_real_total_->Fill(
i, ms(
module.total.time_real));
631 if (module_allocated_total_)
632 module_allocated_total_->Fill(
i, kB(
module.total.allocated));
634 if (module_deallocated_total_)
635 module_deallocated_total_->Fill(
i, kB(
module.total.deallocated));
637 if (module_counter_ and path.
status)
638 module_counter_->Fill(path.
last);
642 :
event_(), paths_(process.paths_.
size()), endpaths_(process.endPaths_.
size()) {}
649 unsigned int lumisections,
653 event_.book(booker,
"process " + process.
name_,
"process " + process.
name_, event_ranges, lumisections, byls);
656 for (
unsigned int id : boost::irange(0ul, paths_.size())) {
657 paths_[
id].book(booker,
"path ", job, process.
paths_[
id], path_ranges, lumisections, byls);
659 for (
unsigned int id : boost::irange(0ul, endpaths_.size())) {
660 endpaths_[
id].book(booker,
"endpath ", job, process.
endPaths_[
id], path_ranges, lumisections, byls);
674 for (
unsigned int id : boost::irange(0ul, paths_.size()))
675 paths_[
id].fill(description.
paths_[
id], data, process.
paths[
id], ls);
678 for (
unsigned int id : boost::irange(0ul, endpaths_.size()))
691 std::vector<GroupOfModules>
const& groups,
695 unsigned int lumisections,
703 event_.book(booker,
"event",
"Event", event_ranges, lumisections, byls);
705 event_ex_.book(booker,
"explicit",
"Event (explicit)", event_ranges, lumisections, byls);
707 overhead_.book(booker,
"overhead",
"Overhead", event_ranges, lumisections, byls);
709 modules_[job.
source().
id()].book(booker,
"source",
"Source", module_ranges, lumisections, byls);
712 lumi_.book(booker,
"lumi",
"LumiSection transitions", event_ranges, lumisections, byls);
714 run_.book(booker,
"run",
"Run transtions", event_ranges, lumisections,
false);
718 for (
unsigned int group : boost::irange(0ul, groups.size())) {
720 highlight_[
group].book(booker,
"highlight " +
label,
"Highlight " +
label, event_ranges, lumisections, byls);
724 for (
unsigned int pid : boost::irange(0ul, job.
processes().size())) {
726 processes_[pid].book(booker, job,
process, event_ranges, path_ranges, lumisections, bypath, byls);
730 for (
unsigned int id :
process.modules_) {
732 modules_[
id].book(booker, module_name, module_name, module_ranges, lumisections, byls);
742 event_ex_.fill(data.
event, ls);
746 for (
unsigned int group : boost::irange(0ul, highlight_.size()))
750 for (
unsigned int id : boost::irange(0ul, modules_.size()))
751 modules_[
id].fill(data.
modules[
id].total, ls);
753 for (
unsigned int pid : boost::irange(0ul, processes_.size()))
764 lumi_.fill(data, ls);
787 enable_dqm_(config.getUntrackedParameter<bool>(
"enableDQM")),
790 enable_dqm_byls_(config.getUntrackedParameter<bool>(
"enableDQMbyLumiSection")),
798 config.getUntrackedParameter<
double>(
"dqmPathTimeResolution"),
799 config.getUntrackedParameter<
double>(
"dqmPathMemoryRange"),
800 config.getUntrackedParameter<
double>(
"dqmPathMemoryResolution")}),
802 config.getUntrackedParameter<
double>(
"dqmModuleTimeResolution"),
803 config.getUntrackedParameter<
double>(
"dqmModuleMemoryRange"),
804 config.getUntrackedParameter<
double>(
"dqmModuleMemoryResolution")}),
812 tbb::task_scheduler_observer::observe();
886 LogDebug(
"FastTimerService") <<
"The FastTimerService received is currently not monitoring the signal \"" << signal
892 if (unsupported_signals_.insert(signal).second)
893 edm::LogWarning(
"FastTimerService") <<
"The FastTimerService received the unsupported signal \"" << signal
895 <<
"Please report how to reproduce the issue to cms-hlt@cern.ch .";
899 ignoredSignal(__func__);
902 if (isFirstSubprocess(gc)) {
904 subprocess_global_run_check_[
index] = 0;
905 run_transition_[
index].reset();
906 run_summary_[
index].reset();
924 enable_dqm_bymodule_,
927 enable_dqm_transitions_);
946 if (enable_dqm_bynproc_)
948 "/Running on %s with %d streams on %d threads",
processor_model, concurrent_streams_, concurrent_threads_);
952 auto safe_for_dqm =
"/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
954 if (safe_for_dqm.find(
c) == std::string::npos)
958 subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
959 for (
unsigned int i = 0;
i < concurrent_streams_; ++
i)
960 subprocess_event_check_[
i] = 0;
961 subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
962 for (
unsigned int i = 0;
i < concurrent_runs_; ++
i)
963 subprocess_global_run_check_[
i] = 0;
964 subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
965 for (
unsigned int i = 0;
i < concurrent_lumis_; ++
i)
966 subprocess_global_lumi_check_[
i] = 0;
969 lumi_transition_.resize(concurrent_lumis_);
970 run_transition_.resize(concurrent_runs_);
974 callgraph_.preSourceConstruction(module);
979 callgraph_.preBeginJob(pathsAndConsumes, context);
983 unsigned int modules = callgraph_.size();
995 auto const&
label = callgraph_.module(
i).moduleLabel();
1004 streams_.resize(concurrent_streams_, temp);
1005 run_summary_.resize(concurrent_runs_, temp);
1006 job_summary_ =
temp;
1011 enable_dqm_ =
false;
1012 edm::LogWarning(
"FastTimerService") <<
"The DQMStore is not avalable, the DQM plots will not be generated";
1028 ignoredSignal(__func__);
1031 if (isFirstSubprocess(gc)) {
1033 subprocess_global_lumi_check_[
index] = 0;
1034 lumi_transition_[
index].reset();
1043 ignoredSignal(__func__);
1047 bool last = isLastSubprocess(subprocess_global_lumi_check_[
index]);
1054 printTransition(out, lumi_transition_[index],
label);
1056 if (enable_dqm_transitions_) {
1072 ignoredSignal(__func__);
1076 bool last = isLastSubprocess(subprocess_global_run_check_[
index]);
1082 if (print_run_summary_) {
1085 printTransition(out, run_transition_[index],
label);
1087 if (enable_dqm_transitions_) {
1088 plots_->fill_run(run_transition_[index]);
1099 thread().measure_and_accumulate(lumi_transition_[index]);
1104 tbb::task_scheduler_observer::observe(
false);
1106 if (print_job_summary_) {
1110 if (write_json_summary_) {
1111 writeSummaryJSON(job_summary_, json_filename_);
1115 template <
typename T>
1117 out <<
"FastReport ";
1118 if (label.size() < 60)
1119 for (
unsigned int i = (60 - label.size()) / 2;
i > 0; --
i)
1121 out <<
' ' << label <<
" Summary ";
1122 if (label.size() < 60)
1123 for (
unsigned int i = (59 - label.size()) / 2;
i > 0; --
i)
1128 template <
typename T>
1130 out <<
"FastReport CPU time Real time Allocated Deallocated " << label <<
"\n";
1134 template <
typename T>
1136 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1139 +static_cast<int64_t>(kB(data.
allocated)),
1144 template <
typename T>
1146 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1149 +
static_cast<int64_t
>(kB(data.
allocated)),
1154 template <
typename T>
1156 printHeader(out,
"Event");
1157 printEventHeader(out,
"Modules");
1158 auto const& source_d = callgraph_.source();
1160 printEventLine(out,
source.total, source_d.moduleLabel());
1161 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1162 auto const& proc_d = callgraph_.processDescription(
i);
1164 printEventLine(out,
proc.total,
"process " + proc_d.name_);
1165 for (
unsigned int m : proc_d.modules_) {
1166 auto const& module_d = callgraph_.module(m);
1168 printEventLine(out,
module.total,
" " + module_d.moduleLabel());
1171 printEventLine(out, data.
total,
"total");
1173 printEventHeader(out,
"Processes and Paths");
1174 printEventLine(out,
source.total, source_d.moduleLabel());
1175 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1176 auto const& proc_d = callgraph_.processDescription(
i);
1178 printEventLine(out,
proc.total,
"process " + proc_d.name_);
1179 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1180 auto const&
name = proc_d.paths_[
p].name_;
1182 printEventLine(out,
path.active,
name +
" (only scheduled modules)");
1183 printEventLine(out,
path.total,
name +
" (including dependencies)");
1185 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1186 auto const&
name = proc_d.endPaths_[
p].name_;
1188 printEventLine(out,
path.active,
name +
" (only scheduled modules)");
1189 printEventLine(out,
path.total,
name +
" (including dependencies)");
1192 printEventLine(out, data.
total,
"total");
1195 printEventHeader(out,
"Highlighted modules");
1197 auto const& module_d = callgraph_.module(
m);
1199 printEventLine(out,
module.total,
" " + module_d.moduleLabel());
1206 template <
typename T>
1210 out <<
"FastReport CPU time avg. when run Real time avg. when run Alloc. avg. when run Dealloc. avg. when run ";
1213 out <<
"FastReport CPU time avg. Real time avg. Alloc. avg. Dealloc. avg. ";
1215 out << label <<
'\n';
1219 template <
typename T>
1222 out <<
"FastReport CPU time sched. / depend. Real time sched. / depend. Alloc. sched. / depend. Dealloc. sched. / depend. ";
1224 out << label <<
'\n';
1228 template <
typename T>
1230 out << fmt::sprintf(
1232 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1235 (events ? ms(data.
time_real) / events : 0),
1236 (events ? +static_cast<int64_t>(kB(data.
allocated) / events) : 0),
1237 (events ? -static_cast<int64_t>(kB(data.
deallocated) / events) : 0),
1241 template <
typename T>
1244 out << fmt::sprintf(
1246 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1250 (events ? ms(data.
time_real) / events : 0),
1251 (active ? ms(data.
time_real) / active : 0),
1252 (events ? +static_cast<int64_t>(kB(data.
allocated) / events) : 0),
1253 (active ? +static_cast<int64_t>(kB(data.
allocated) / active) : 0),
1254 (events ? -static_cast<int64_t>(kB(data.
deallocated) / events) : 0),
1255 (active ? -static_cast<int64_t>(kB(data.
deallocated) / active) : 0),
1259 template <
typename T>
1264 out << fmt::sprintf(
1266 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1269 (events ? ms(data.
time_real) / events : 0),
1270 (events ? +static_cast<int64_t>(kB(data.
allocated) / events) : 0),
1271 (events ? -static_cast<int64_t>(kB(data.
deallocated) / events) : 0),
1275 template <
typename T>
1278 out << fmt::sprintf(
1279 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1282 (events ? ms(data.
time_real) / events : 0),
1283 (active ? ms(data.
time_real) / active : 0),
1284 (events ? +static_cast<int64_t>(kB(data.
allocated) / events) : 0),
1285 (active ? +static_cast<int64_t>(kB(data.
allocated) / active) : 0),
1286 (events ? -static_cast<int64_t>(kB(data.
deallocated) / events) : 0),
1287 (active ? -static_cast<int64_t>(kB(data.
deallocated) / active) : 0),
1291 template <
typename T>
1294 out << fmt::sprintf(
1295 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1298 (events ? ms(data.
time_real) / events : 0),
1299 (events ? ms(total.
time_real) / events : 0),
1300 (events ? +static_cast<int64_t>(kB(data.
allocated) / events) : 0),
1301 (events ? +static_cast<int64_t>(kB(total.
allocated) / events) : 0),
1302 (events ? -static_cast<int64_t>(kB(data.
deallocated) / events) : 0),
1303 (events ? -static_cast<int64_t>(kB(total.
deallocated) / events) : 0),
1307 template <
typename T>
1309 printHeader(out, label);
1310 printSummaryHeader(out,
"Modules",
true);
1311 auto const& source_d = callgraph_.source();
1313 printSummaryLine(out,
source.total, data.
events,
source.events, source_d.moduleLabel());
1314 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1315 auto const& proc_d = callgraph_.processDescription(
i);
1317 printSummaryLine(out,
proc.total, data.
events,
"process " + proc_d.name_);
1318 for (
unsigned int m : proc_d.modules_) {
1319 auto const& module_d = callgraph_.module(m);
1321 printSummaryLine(out,
module.total, data.
events,
module.events,
" " + module_d.moduleLabel());
1324 printSummaryLine(out, data.
total, data.
events,
"total");
1327 printPathSummaryHeader(out,
"Processes and Paths");
1328 printSummaryLine(out,
source.total, data.
events, source_d.moduleLabel());
1329 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1330 auto const& proc_d = callgraph_.processDescription(
i);
1332 printSummaryLine(out,
proc.total, data.
events,
"process " + proc_d.name_);
1333 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1334 auto const&
name = proc_d.paths_[
p].name_;
1338 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1339 auto const&
name = proc_d.endPaths_[
p].name_;
1344 printSummaryLine(out, data.
total, data.
events,
"total");
1348 printSummaryHeader(out,
"Highlighted modules",
true);
1350 auto const& module_d = callgraph_.module(
m);
1352 printSummaryLine(out,
module.total, data.
events,
module.events, module_d.moduleLabel());
1359 template <
typename T>
1361 printEventHeader(out,
"Transition");
1362 printEventLine(out, data, label);
1365 template <
typename T>
1369 T const&
data)
const {
1370 return json{{
"type", type},
1373 {
"time_thread", ms(data.time_thread)},
1374 {
"time_real", ms(data.time_real)},
1375 {
"mem_alloc", kB(data.allocated)},
1376 {
"mem_free", kB(data.deallocated)}};
1388 json{{
"time_thread",
"cpu time"}},
1389 json{{
"mem_alloc",
"allocated memory"}},
1390 json{{
"mem_free",
"deallocated memory"}}});
1393 j[
"total"] = encodeToJSON(
"Job", callgraph_.processDescription(0).name_, data.
events, data.
total + data.
overhead);
1397 for (
unsigned int i = 0;
i < callgraph_.size(); ++
i) {
1398 auto const& module = callgraph_.module(
i);
1399 auto const& data_m = data.
modules[
i];
1400 j[
"modules"].push_back(encodeToJSON(module, data_m));
1404 j[
"modules"].push_back(encodeToJSON(
"other",
"other", data.
events, data.
overhead));
1406 std::ofstream
out(filename);
1407 out << std::setw(2) << j << std::flush;
1423 unsigned int old_value = check.fetch_add(1, std::memory_order_acq_rel);
1424 return (old_value == callgraph_.processes().size() - 1);
1430 ignoredSignal(__func__);
1434 auto&
stream = streams_[sid];
1435 auto&
process = callgraph_.processDescription(pid);
1438 auto& data =
stream.processes[pid].total;
1439 for (
unsigned int id :
process.modules_)
1444 bool last = isLastSubprocess(subprocess_event_check_[sid]);
1449 stream.event_measurement.measure_and_store(
stream.event);
1452 unsigned int id = 0;
1462 std::lock_guard<std::mutex> guard(summary_mutex_);
1467 if (print_event_summary_) {
1479 auto&
stream = streams_[sid];
1483 subprocess_event_check_[sid] = 0;
1486 auto& measurement = thread();
1487 measurement.measure_and_accumulate(
stream.overhead);
1488 stream.event_measurement = measurement;
1493 unsigned int id = md.
id();
1494 auto&
stream = streams_[sid];
1497 thread().measure_and_store(module.total);
1504 unsigned int id = pc.
pathID();
1505 auto&
stream = streams_[sid];
1507 data.status =
false;
1516 unsigned int id = pc.
pathID();
1517 auto&
stream = streams_[sid];
1521 pc.
isEndPath() ? callgraph_.processDescription(pid).endPaths_[
id] : callgraph_.processDescription(pid).paths_[
id];
1522 unsigned int index =
path.modules_on_path_.empty() ? 0 : status.
index() + 1;
1523 data.last =
path.modules_on_path_.empty() ? 0 :
path.last_dependency_of_module_[status.
index()];
1525 for (
unsigned int i = 0;
i <
index; ++
i) {
1526 auto const& module =
stream.modules[
path.modules_on_path_[
i]];
1527 data.active += module.total;
1529 for (
unsigned int i = 0;
i < data.last; ++
i) {
1530 auto const& module =
stream.modules[
path.modules_and_dependencies_[
i]];
1531 data.total += module.total;
1537 auto&
stream = streams_[sid];
1538 thread().measure_and_accumulate(
stream.overhead);
1543 unsigned int id = md.
id();
1545 auto&
stream = streams_[sid];
1548 module.has_acquire =
true;
1549 thread().measure_and_store(module.total);
1554 auto&
stream = streams_[sid];
1555 thread().measure_and_accumulate(
stream.overhead);
1560 unsigned int id = md.
id();
1562 auto&
stream = streams_[sid];
1565 if (module.has_acquire) {
1566 thread().measure_and_accumulate(module.total);
1568 thread().measure_and_store(module.total);
1574 unsupportedSignal(__func__);
1578 unsupportedSignal(__func__);
1582 ignoredSignal(__func__);
1586 ignoredSignal(__func__);
1590 ignoredSignal(__func__);
1594 ignoredSignal(__func__);
1598 thread().measure_and_accumulate(overhead_);
1603 thread().measure_and_accumulate(run_transition_[
index]);
1607 thread().measure_and_accumulate(overhead_);
1612 thread().measure_and_accumulate(run_transition_[
index]);
1616 thread().measure_and_accumulate(overhead_);
1621 thread().measure_and_accumulate(lumi_transition_[
index]);
1625 thread().measure_and_accumulate(overhead_);
1630 thread().measure_and_accumulate(lumi_transition_[
index]);
1634 thread().measure_and_accumulate(overhead_);
1639 thread().measure_and_accumulate(run_transition_[
index]);
1643 thread().measure_and_accumulate(overhead_);
1648 thread().measure_and_accumulate(run_transition_[
index]);
1652 thread().measure_and_accumulate(overhead_);
1657 thread().measure_and_accumulate(lumi_transition_[
index]);
1661 thread().measure_and_accumulate(overhead_);
1666 thread().measure_and_accumulate(lumi_transition_[
index]);
1672 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard key creation failed: " << ::strerror(
err);
1678 auto ptr = ::pthread_getspecific(key_);
1681 auto p = thread_resources_.emplace_back(std::make_shared<specific_t>(r));
1682 auto pp =
new std::shared_ptr<specific_t>(*p);
1683 auto err = ::pthread_setspecific(key_,
pp);
1685 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard pthread_setspecific failed: " << ::strerror(
err);
1693 return static_cast<std::shared_ptr<specific_t>*
>(
p);
1699 auto expected =
true;
1700 if ((*ps)->live_.compare_exchange_strong(expected,
false)) {
1702 (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1709 for (
auto&
p : thread_resources_) {
1710 auto expected =
true;
1711 if (
p->live_.compare_exchange_strong(expected,
false)) {
1712 p->measurement_.measure_and_accumulate(
p->resource_);
1718 return (*ptr(::pthread_getspecific(key_)))->measurement_;
1748 desc.
addUntracked<
bool>(
"enableDQMbyLumiSection",
false);
1749 desc.
addUntracked<
bool>(
"enableDQMbyProcesses",
false);
1750 desc.
addUntracked<
bool>(
"enableDQMTransitions",
false);
1754 desc.
addUntracked<
double>(
"dqmMemoryResolution", 5000.);
1756 desc.
addUntracked<
double>(
"dqmPathTimeResolution", 0.5);
1757 desc.
addUntracked<
double>(
"dqmPathMemoryRange", 1000000.);
1758 desc.
addUntracked<
double>(
"dqmPathMemoryResolution", 5000.);
1760 desc.
addUntracked<
double>(
"dqmModuleTimeResolution", 0.2);
1761 desc.
addUntracked<
double>(
"dqmModuleMemoryRange", 100000.);
1762 desc.
addUntracked<
double>(
"dqmModuleMemoryResolution", 500.);
1763 desc.
addUntracked<
unsigned>(
"dqmLumiSectionsRange", 2500);
1767 highlightModulesDescription.
addUntracked<std::vector<std::string>>(
"modules", {});
1768 highlightModulesDescription.addUntracked<
std::string>(
"label",
"producers");
1774 ->
setComment(
"This parameter is obsolete and will be ignored.");
1776 ->
setComment(
"This parameter is obsolete and will be ignored.");
1778 ->
setComment(
"This parameter is obsolete and will be ignored.");
1780 ->
setComment(
"This parameter is obsolete and will be ignored.");
1782 ->
setComment(
"This parameter is obsolete and will be ignored.");
1784 ->
setComment(
"This parameter is obsolete and will be ignored.");
1786 ->
setComment(
"This parameter is obsolete and will be ignored.");
1788 ->
setComment(
"This parameter is obsolete and will be ignored.");
1790 ->
setComment(
"This parameter is obsolete and will be ignored.");
1792 ->
setComment(
"This parameter is obsolete and will be ignored.");
1794 ->
setComment(
"This parameter is obsolete and will be ignored.");
1796 ->
setComment(
"This parameter is obsolete and will be ignored.");
1798 ->
setComment(
"This parameter is obsolete and will be ignored.");
1800 ->
setComment(
"This parameter is obsolete and will be ignored.");
1802 descriptions.
add(
"FastTimerService", desc);
ResourcesPerJob()=default
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
unsigned int maxNumberOfThreads() const
constexpr int32_t ceil(float num)
list processes
Run mode ##.
void fill(Resources const &, unsigned int lumisection)
void setComment(std::string const &value)
T getUntrackedParameter(std::string const &, T const &) const
void postStreamBeginRun(edm::StreamContext const &)
LuminosityBlockIndex const & luminosityBlockIndex() const
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const edm::EventSetup & c
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void fill_lumi(AtomicResources const &, unsigned int lumisection)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
AtomicResources operator+(AtomicResources const &other) const
unsigned int concurrent_threads_
std::vector< unsigned int > modules_and_dependencies_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
uint16_t *__restrict__ id
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
virtual void setCurrentFolder(std::string const &fullpath)
const bool write_json_summary_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, ProcessCallGraph::ProcessType const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, unsigned int lumisections, bool bypath, bool byls)
PlotsPerJob(ProcessCallGraph const &job, std::vector< GroupOfModules > const &groups)
PlotsPerProcess(ProcessCallGraph::ProcessType const &)
void preGlobalEndLumi(edm::GlobalContext const &)
std::vector< ResourcesPerPath > endpaths
virtual std::string pwd()
LuminosityBlockID const & luminosityBlockID() const
AtomicResources overhead_
const std::string json_filename_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
void writeSummaryJSON(ResourcesPerJob const &data, std::string const &filename) const
boost::chrono::nanoseconds time_real
std::string const & moduleName() const
std::atomic< uint64_t > deallocated
void postSourceRun(edm::RunIndex)
std::atomic< uint64_t > allocated
RunIndex const & runIndex() const
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
ResourcesPerProcess & operator+=(ResourcesPerProcess const &other)
void book(dqm::reco::DQMStore::IBooker &, std::string const &, ProcessCallGraph const &, ProcessCallGraph::PathType const &, PlotRanges const &ranges, unsigned int lumisections, bool byls)
LuminosityBlockNumber_t luminosityBlock() const
const bool print_job_summary_
std::string const & moduleLabel() const
const bool enable_dqm_bypath_
edm::ModuleDescription const & source() const
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, std::vector< GroupOfModules > const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, PlotRanges const &module_ranges, unsigned int lumisections, bool bymodule, bool bypath, bool byls, bool transitions)
dqm_lumisections_range_(config.getUntrackedParameter< unsigned int >("dqmLumiSectionsRange"))
void printSummary(const std::map< unsigned int, SiStripDetSummary::Values > &map)
void postStreamBeginLumi(edm::StreamContext const &)
RunIndex const & runIndex() const
unsigned int maxNumberOfStreams() const
ResourcesPerProcess operator+(ResourcesPerProcess const &other) const
ResourcesPerJob operator+(ResourcesPerJob const &other) const
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
MonitorElement * book1DD(TString const &name, TString const &title, int nchX, double lowX, double highX, FUNC onbooking=NOOP())
void printPathSummaryHeader(T &out, std::string const &label) const
ResourcesPerJob & operator+=(ResourcesPerJob const &other)
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
virtual void setXTitle(std::string const &title)
void fill(ProcessCallGraph::ProcessType const &, ResourcesPerJob const &, ResourcesPerProcess const &, unsigned int ls)
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
LuminosityBlockIndex const & luminosityBlockIndex() const
MonitorElement * bookProfile(TString const &name, TString const &title, int nchX, double lowX, double highX, int, double lowY, double highY, char const *option="s", FUNC onbooking=NOOP())
const std::string processor_model
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
static uint64_t deallocated()
ModuleDescription const * moduleDescription() const
void fill_fraction(Resources const &, Resources const &, unsigned int lumisection)
std::vector< ProcessType > const & processes() const
ResourcesPerModule operator+(ResourcesPerModule const &other) const
void postStreamEndRun(edm::StreamContext const &)
std::vector< PathType > paths_
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath & operator+=(ResourcesPerPath const &other)
ProcessType const & processDescription(unsigned int) const
ParameterDescriptionNode * addOptionalNode(ParameterDescriptionNode const &node, bool writeToCfi)
unsigned int concurrent_lumis_
ResourcesPerModule() noexcept
bool register_thread(FastTimerService::AtomicResources &r)
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
unsigned int pathID() const
const bool enable_dqm_transitions_
ProcessContext const * processContext() const
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::atomic< boost::chrono::nanoseconds::rep > time_thread
Resources operator+(Resources const &other) const
void measure_and_store(Resources &store) noexcept
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
StreamID const & streamID() const
void book(dqm::reco::DQMStore::IBooker &, std::string const &name, std::string const &title, PlotRanges const &ranges, unsigned int lumisections, bool byls)
void fill_run(AtomicResources const &)
#define DEFINE_FWK_SERVICE(type)
static bool is_available()
const bool enable_dqm_byls_
std::vector< PlotsPerProcess > processes_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
virtual void setYTitle(std::string const &title)
unsigned int value() const
highlight_modules_(highlight_module_psets_.size())
unsigned int maxNumberOfConcurrentLuminosityBlocks() const
static std::shared_ptr< specific_t > * ptr(void *p)
unsigned long long uint64_t
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
static uint64_t allocated()
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
static void retire_thread(void *t)
boost::chrono::nanoseconds time_thread
LuminosityBlockNumber_t luminosityBlock() const
void add(std::string const &label, ParameterSetDescription const &psetDescription)
dqm_path_(config.getUntrackedParameter< std::string >("dqmPath"))
void preEvent(edm::StreamContext const &)
dqm_path_ranges_({config.getUntrackedParameter< double >("dqmPathTimeRange"), config.getUntrackedParameter< double >("dqmPathTimeResolution"), config.getUntrackedParameter< double >("dqmPathMemoryRange"), config.getUntrackedParameter< double >("dqmPathMemoryResolution")})
void postGlobalBeginRun(edm::GlobalContext const &)
ProcessContext const * processContext() const
void preStreamBeginLumi(edm::StreamContext const &)
ResourcesPerProcess(ProcessCallGraph::ProcessType const &process)
void ignoredSignal(const std::string &signal) const
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
tuple config
parse the configuration file
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
std::atomic< boost::chrono::nanoseconds::rep > time_real
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_accumulate(Resources &store) noexcept
ResourcesPerModule & operator+=(ResourcesPerModule const &other)
std::vector< PathType > endPaths_
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
edm::ModuleDescription const & module(unsigned int module) const
void preGlobalBeginLumi(edm::GlobalContext const &)
unsigned int maxNumberOfConcurrentRuns() const
AtomicResources & operator+=(AtomicResources const &other)
Resources & operator+=(Resources const &other)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
EventID const & eventID() const
void postSourceLumi(edm::LuminosityBlockIndex)
dqm_module_ranges_({config.getUntrackedParameter< double >("dqmModuleTimeRange"), config.getUntrackedParameter< double >("dqmModuleTimeResolution"), config.getUntrackedParameter< double >("dqmModuleMemoryRange"), config.getUntrackedParameter< double >("dqmModuleMemoryResolution")})
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
Log< level::Warning, false > LogWarning
MonitorElement * book1D(TString const &name, TString const &title, int const nchX, double const lowX, double const highX, FUNC onbooking=NOOP())
UseScope< MonitorElementData::Scope::RUN > UseRunScope
AtomicResources & operator=(AtomicResources const &other)
highlight_module_psets_(config.getUntrackedParameter< std::vector< edm::ParameterSet >>("highlightModules"))
ProcessCallGraph callgraph_
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
std::vector< ResourcesPerPath > paths
bool isSubProcess() const
ResourcesPerPath operator+(ResourcesPerPath const &other) const
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addVPSetUntracked(U const &iLabel, ParameterSetDescription const &validator, std::vector< ParameterSet > const &defaults)
static std::string const source
tuple size
Write out results.
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
void fill(ProcessCallGraph const &, ResourcesPerJob const &, unsigned int ls)
json encodeToJSON(std::string const &type, std::string const &label, unsigned int events, T const &data) const
void fill(ProcessCallGraph::PathType const &, ResourcesPerJob const &, ResourcesPerPath const &, unsigned int lumisection)
unsigned int index() const