10 #include <unordered_map> 11 #include <unordered_set> 14 #include <boost/range/irange.hpp> 17 #include <fmt/printf.h> 20 #include <nlohmann/json.hpp> 51 template <
class Rep,
class Period>
52 double ms(std::chrono::duration<Rep, Period> duration) {
53 return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).
count();
57 template <
class Rep,
class Period>
58 double ms(boost::chrono::duration<Rep, Period> duration) {
59 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).
count();
63 double ms(std::atomic<boost::chrono::nanoseconds::rep>
const& duration) {
64 return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
73 uint64_t kB(std::atomic<uint64_t>
const& bytes) {
return bytes.load() / 1024; }
97 time_thread +=
other.time_thread;
98 time_real +=
other.time_real;
99 allocated +=
other.allocated;
100 deallocated +=
other.deallocated;
107 allocated +=
other.allocated.load();
108 deallocated +=
other.deallocated.load();
130 : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
146 time_thread =
other.time_thread.load();
147 time_real =
other.time_real.load();
148 allocated =
other.allocated.load();
149 deallocated =
other.deallocated.load();
154 time_thread +=
other.time_thread.load();
155 time_real +=
other.time_real.load();
156 allocated +=
other.allocated.load();
157 deallocated +=
other.deallocated.load();
162 time_thread +=
other.time_thread.count();
163 time_real +=
other.time_real.count();
164 allocated +=
other.allocated;
165 deallocated +=
other.deallocated;
176 return other + *
this;
192 has_acquire = has_acquire
or other.has_acquire;
213 active +=
other.active;
235 for (
auto&
path : endpaths)
243 for (
unsigned int i : boost::irange(0ul,
paths.size()))
246 for (
unsigned int i : boost::irange(0ul, endpaths.size()))
247 endpaths[
i] +=
other.endpaths[
i];
261 std::vector<GroupOfModules>
const& groups)
263 processes.reserve(job.processes().size());
264 for (
auto const&
process : job.processes())
274 for (
auto&
module : highlight)
285 overhead +=
other.overhead;
287 eventsetup +=
other.eventsetup;
288 event +=
other.event;
289 assert(highlight.size() ==
other.highlight.size());
290 for (
unsigned int i : boost::irange(0ul, highlight.size()))
291 highlight[
i] +=
other.highlight[
i];
293 for (
unsigned int i : boost::irange(0ul,
modules.size()))
296 for (
unsigned int i : boost::irange(0ul,
processes.size()))
315 #ifdef DEBUG_THREAD_CONCURRENCY 316 id = std::this_thread::get_id();
317 #endif // DEBUG_THREAD_CONCURRENCY 325 #ifdef DEBUG_THREAD_CONCURRENCY 326 assert(std::this_thread::get_id() ==
id);
327 #endif // DEBUG_THREAD_CONCURRENCY 332 store.time_thread = new_time_thread - time_thread;
333 store.time_real = new_time_real - time_real;
334 store.allocated = new_allocated - allocated;
335 store.deallocated = new_deallocated - deallocated;
336 time_thread = new_time_thread;
337 time_real = new_time_real;
338 allocated = new_allocated;
339 deallocated = new_deallocated;
343 #ifdef DEBUG_THREAD_CONCURRENCY 344 assert(std::this_thread::get_id() ==
id);
345 #endif // DEBUG_THREAD_CONCURRENCY 350 store.time_thread += new_time_thread - time_thread;
351 store.time_real += new_time_real - time_real;
352 store.allocated += new_allocated - allocated;
353 store.deallocated += new_deallocated - deallocated;
354 time_thread = new_time_thread;
355 time_real = new_time_real;
356 allocated = new_allocated;
357 deallocated = new_deallocated;
361 #ifdef DEBUG_THREAD_CONCURRENCY 362 assert(std::this_thread::get_id() ==
id);
363 #endif // DEBUG_THREAD_CONCURRENCY 370 store.allocated += new_allocated - allocated;
371 store.deallocated += new_deallocated - deallocated;
372 time_thread = new_time_thread;
373 time_real = new_time_real;
374 allocated = new_allocated;
375 deallocated = new_deallocated;
384 unsigned int lumisections,
388 std::string y_title_ms = fmt::sprintf(
"events / %.1f ms",
ranges.time_resolution);
389 std::string y_title_kB = fmt::sprintf(
"events / %.1f kB",
ranges.memory_resolution);
393 booker.
book1D(
name +
" time_thread",
title +
" processing time (cpu)", time_bins, 0.,
ranges.time_range);
394 time_thread_->
setXTitle(
"processing time [ms]");
395 time_thread_->setYTitle(y_title_ms);
396 time_thread_->setStatOverflows(kTRUE);
398 time_real_ = booker.
book1D(
name +
" time_real",
title +
" processing time (real)", time_bins, 0.,
ranges.time_range);
399 time_real_->
setXTitle(
"processing time [ms]");
400 time_real_->setYTitle(y_title_ms);
401 time_real_->setStatOverflows(kTRUE);
404 allocated_ = booker.
book1D(
name +
" allocated",
title +
" allocated memory", mem_bins, 0.,
ranges.memory_range);
406 allocated_->setYTitle(y_title_kB);
407 allocated_->setStatOverflows(kTRUE);
410 booker.
book1D(
name +
" deallocated",
title +
" deallocated memory", mem_bins, 0.,
ranges.memory_range);
412 deallocated_->setYTitle(y_title_kB);
413 deallocated_->setStatOverflows(kTRUE);
420 title +
" processing time (cpu) vs. lumisection",
428 time_thread_byls_->
setXTitle(
"lumisection");
429 time_thread_byls_->setYTitle(
"processing time [ms]");
430 time_thread_byls_->setStatOverflows(kTRUE);
433 title +
" processing time (real) vs. lumisection",
441 time_real_byls_->
setXTitle(
"lumisection");
442 time_real_byls_->setYTitle(
"processing time [ms]");
443 time_real_byls_->setStatOverflows(kTRUE);
447 title +
" allocated memory vs. lumisection",
455 allocated_byls_->
setXTitle(
"lumisection");
456 allocated_byls_->setYTitle(
"memory [kB]");
457 allocated_byls_->setStatOverflows(kTRUE);
460 title +
" deallocated memory vs. lumisection",
468 deallocated_byls_->
setXTitle(
"lumisection");
469 deallocated_byls_->setYTitle(
"memory [kB]");
470 deallocated_byls_->setStatOverflows(kTRUE);
476 time_thread_->Fill(ms(
data.time_thread));
478 if (time_thread_byls_)
479 time_thread_byls_->Fill(lumisection, ms(
data.time_thread));
482 time_real_->Fill(ms(
data.time_real));
485 time_real_byls_->Fill(lumisection, ms(
data.time_real));
488 allocated_->Fill(kB(
data.allocated));
491 allocated_byls_->Fill(lumisection, kB(
data.allocated));
494 deallocated_->Fill(kB(
data.deallocated));
496 if (deallocated_byls_)
497 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
504 if (time_thread_byls_)
514 allocated_->Fill(kB(
data.allocated));
517 allocated_byls_->Fill(lumisection, kB(
data.allocated));
520 deallocated_->Fill(kB(
data.deallocated));
522 if (deallocated_byls_)
523 deallocated_byls_->Fill(lumisection, kB(
data.deallocated));
528 unsigned int lumisection) {
537 if (time_thread_byls_)
561 if (deallocated_byls_)
570 unsigned int lumisections,
577 total_.book(booker,
"path",
path.name_,
ranges, lumisections, byls);
580 unsigned int bins =
path.modules_and_dependencies_.size();
581 module_counter_ = booker.
book1DD(
"module_counter",
"module counter",
bins + 1, -0.5,
bins + 0.5);
583 module_counter_->setStatOverflows(kTRUE);
584 module_time_thread_total_ =
585 booker.
book1DD(
"module_time_thread_total",
"total module time (cpu)",
bins, -0.5,
bins - 0.5);
586 module_time_thread_total_->
setYTitle(
"processing time [ms]");
587 module_time_thread_total_->setStatOverflows(kTRUE);
588 module_time_real_total_ =
589 booker.
book1DD(
"module_time_real_total",
"total module time (real)",
bins, -0.5,
bins - 0.5);
590 module_time_real_total_->
setYTitle(
"processing time [ms]");
591 module_time_real_total_->setStatOverflows(kTRUE);
593 module_allocated_total_ =
594 booker.
book1DD(
"module_allocated_total",
"total allocated memory",
bins, -0.5,
bins - 0.5);
595 module_allocated_total_->
setYTitle(
"memory [kB]");
596 module_allocated_total_->setStatOverflows(kTRUE);
597 module_deallocated_total_ =
598 booker.
book1DD(
"module_deallocated_total",
"total deallocated memory",
bins, -0.5,
bins - 0.5);
599 module_deallocated_total_->
setYTitle(
"memory [kB]");
600 module_deallocated_total_->setStatOverflows(kTRUE);
602 for (
unsigned int bin : boost::irange(0u,
bins)) {
603 auto const&
module = job[
path.modules_and_dependencies_[
bin]];
605 module.scheduled_ ?
module.module_.moduleLabel() :
module.module_.moduleLabel() +
" (unscheduled)";
606 module_counter_->setBinLabel(
bin + 1,
label);
607 module_time_thread_total_->setBinLabel(
bin + 1,
label);
608 module_time_real_total_->setBinLabel(
bin + 1,
label);
610 module_allocated_total_->setBinLabel(
bin + 1,
label);
611 module_deallocated_total_->setBinLabel(
bin + 1,
label);
614 module_counter_->setBinLabel(
bins + 1,
"");
624 total_.fill(
path.total,
ls);
627 for (
unsigned int i = 0;
i <
path.last; ++
i) {
630 module_counter_->Fill(
i);
632 if (module_time_thread_total_)
633 module_time_thread_total_->Fill(
i, ms(
module.total.time_thread));
635 if (module_time_real_total_)
636 module_time_real_total_->Fill(
i, ms(
module.total.time_real));
638 if (module_allocated_total_)
639 module_allocated_total_->Fill(
i, kB(
module.total.allocated));
641 if (module_deallocated_total_)
642 module_deallocated_total_->Fill(
i, kB(
module.total.deallocated));
644 if (module_counter_ and
path.status)
645 module_counter_->Fill(
path.last);
656 unsigned int lumisections,
660 event_.book(booker,
"process " +
process.name_,
"process " +
process.name_, event_ranges, lumisections, byls);
663 for (
unsigned int id : boost::irange(0ul, paths_.size())) {
664 paths_[
id].book(booker,
"path ", job,
process.paths_[
id], path_ranges, lumisections, byls);
666 for (
unsigned int id : boost::irange(0ul, endpaths_.size())) {
667 endpaths_[
id].book(booker,
"endpath ", job,
process.endPaths_[
id], path_ranges, lumisections, byls);
681 for (
unsigned int id : boost::irange(0ul, paths_.size()))
685 for (
unsigned int id : boost::irange(0ul, endpaths_.size()))
690 : highlight_{groups.size()}, modules_{job.size()} {
691 processes_.reserve(job.processes().size());
692 for (
auto const&
process : job.processes())
693 processes_.emplace_back(
process);
698 std::vector<GroupOfModules>
const& groups,
702 unsigned int lumisections,
710 event_.book(booker,
"event",
"Event", event_ranges, lumisections, byls);
711 event_ex_.book(booker,
"explicit",
"Event (explicit)", event_ranges, lumisections, byls);
712 overhead_.book(booker,
"overhead",
"Overhead", event_ranges, lumisections, byls);
713 idle_.book(booker,
"idle",
"Idle", event_ranges, lumisections, byls);
715 modules_[job.
source().
id()].book(booker,
"source",
"Source", module_ranges, lumisections, byls);
718 lumi_.book(booker,
"lumi",
"LumiSection transitions", event_ranges, lumisections, byls);
720 run_.book(booker,
"run",
"Run transtions", event_ranges, lumisections,
false);
724 for (
unsigned int group : boost::irange(0ul, groups.size())) {
726 highlight_[
group].book(booker,
"highlight " +
label,
"Highlight " +
label, event_ranges, lumisections, byls);
730 for (
unsigned int pid : boost::irange(0ul, job.
processes().size())) {
732 processes_[pid].book(booker, job,
process, event_ranges, path_ranges, lumisections, bypath, byls);
736 for (
unsigned int id :
process.modules_) {
740 modules_[
id].book(booker, safe_label, module_label, module_ranges, lumisections, byls);
749 event_.fill(
data.total,
ls);
750 event_ex_.fill(
data.event,
ls);
751 overhead_.fill(
data.overhead,
ls);
752 idle_.fill(
data.idle,
ls);
755 for (
unsigned int group : boost::irange(0ul, highlight_.size()))
759 for (
unsigned int id : boost::irange(0ul, modules_.size()))
760 modules_[
id].fill(
data.modules[
id].total,
ls);
762 for (
unsigned int pid : boost::irange(0ul, processes_.size()))
803 config.getUntrackedParameter<
double>(
"dqmTimeResolution"),
804 config.getUntrackedParameter<
double>(
"dqmMemoryRange"),
805 config.getUntrackedParameter<
double>(
"dqmMemoryResolution")}),
806 dqm_path_ranges_({
config.getUntrackedParameter<
double>(
"dqmPathTimeRange"),
807 config.getUntrackedParameter<
double>(
"dqmPathTimeResolution"),
808 config.getUntrackedParameter<
double>(
"dqmPathMemoryRange"),
809 config.getUntrackedParameter<
double>(
"dqmPathMemoryResolution")}),
810 dqm_module_ranges_({
config.getUntrackedParameter<
double>(
"dqmModuleTimeRange"),
811 config.getUntrackedParameter<
double>(
"dqmModuleTimeResolution"),
812 config.getUntrackedParameter<
double>(
"dqmModuleMemoryRange"),
813 config.getUntrackedParameter<
double>(
"dqmModuleMemoryResolution")}),
814 dqm_lumisections_range_(
config.getUntrackedParameter<
unsigned int>(
"dqmLumiSectionsRange")),
817 highlight_module_psets_(
config.getUntrackedParameter<std::vector<edm::ParameterSet>>(
"highlightModules")),
818 highlight_modules_(highlight_module_psets_.size())
821 tbb::task_scheduler_observer::observe();
897 LogDebug(
"FastTimerService") <<
"The FastTimerService received is currently not monitoring the signal \"" << signal
903 if (unsupported_signals_.insert(signal).second)
904 edm::LogWarning(
"FastTimerService") <<
"The FastTimerService received the unsupported signal \"" << signal
906 <<
"Please report how to reproduce the issue to cms-hlt@cern.ch .";
910 ignoredSignal(__func__);
913 if (isFirstSubprocess(gc)) {
915 subprocess_global_run_check_[
index] = 0;
916 run_transition_[
index].reset();
917 run_summary_[
index].reset();
934 dqm_lumisections_range_,
935 enable_dqm_bymodule_,
938 enable_dqm_transitions_);
954 static const auto safe_for_dqm =
"/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
956 if (safe_for_dqm.find(
c) == std::string::npos)
961 concurrent_lumis_ =
bounds.maxNumberOfConcurrentLuminosityBlocks();
962 concurrent_runs_ =
bounds.maxNumberOfConcurrentRuns();
963 concurrent_streams_ =
bounds.maxNumberOfStreams();
964 concurrent_threads_ =
bounds.maxNumberOfThreads();
966 if (enable_dqm_bynproc_)
967 dqm_path_ += fmt::sprintf(
968 "/Running on %s with %d streams on %d threads",
processor_model, concurrent_streams_, concurrent_threads_);
971 fixForDQM(dqm_path_);
974 subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
975 for (
unsigned int i = 0;
i < concurrent_streams_; ++
i)
976 subprocess_event_check_[
i] = 0;
977 subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
978 for (
unsigned int i = 0;
i < concurrent_runs_; ++
i)
979 subprocess_global_run_check_[
i] = 0;
980 subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
981 for (
unsigned int i = 0;
i < concurrent_lumis_; ++
i)
982 subprocess_global_lumi_check_[
i] = 0;
985 lumi_transition_.resize(concurrent_lumis_);
986 run_transition_.resize(concurrent_runs_);
990 callgraph_.preSourceConstruction(
module);
995 callgraph_.preBeginJob(pathsAndConsumes,
context);
999 unsigned int modules = callgraph_.size();
1002 for (
unsigned int group : boost::irange(0ul, highlight_module_psets_.size())) {
1004 auto labels = highlight_module_psets_[
group].getUntrackedParameter<std::vector<std::string>>(
"modules");
1007 highlight_modules_[
group].label = highlight_module_psets_[
group].getUntrackedParameter<
std::string>(
"label");
1008 highlight_modules_[
group].modules.reserve(
labels.size());
1011 auto const&
label = callgraph_.module(
i).moduleLabel();
1013 highlight_modules_[
group].modules.push_back(
i);
1016 highlight_module_psets_.clear();
1020 streams_.resize(concurrent_streams_,
temp);
1021 run_summary_.resize(concurrent_runs_,
temp);
1022 job_summary_ =
temp;
1027 enable_dqm_ =
false;
1028 edm::LogWarning(
"FastTimerService") <<
"The DQMStore is not avalable, the DQM plots will not be generated";
1033 plots_ = std::make_unique<PlotsPerJob>(callgraph_, highlight_modules_);
1044 ignoredSignal(__func__);
1047 if (isFirstSubprocess(gc)) {
1049 subprocess_global_lumi_check_[
index] = 0;
1050 lumi_transition_[
index].reset();
1059 ignoredSignal(__func__);
1063 bool last = isLastSubprocess(subprocess_global_lumi_check_[
index]);
1072 if (enable_dqm_ and enable_dqm_transitions_) {
1088 ignoredSignal(__func__);
1092 bool last = isLastSubprocess(subprocess_global_run_check_[
index]);
1098 if (print_run_summary_) {
1103 if (enable_dqm_ and enable_dqm_transitions_) {
1104 plots_->fill_run(run_transition_[
index]);
1113 thread().measure_and_accumulate(job_summary_.overhead);
1117 thread().measure_and_accumulate(lumi_transition_[
index]);
1122 tbb::task_scheduler_observer::observe(
false);
1125 if (print_job_summary_) {
1129 if (write_json_summary_) {
1130 writeSummaryJSON(job_summary_, json_filename_);
1134 template <
typename T>
1136 out <<
"FastReport ";
1137 if (
label.size() < 60)
1138 for (
unsigned int i = (60 -
label.size()) / 2;
i > 0; --
i)
1140 out <<
' ' <<
label <<
" Summary ";
1141 if (
label.size() < 60)
1142 for (
unsigned int i = (59 -
label.size()) / 2;
i > 0; --
i)
1147 template <
typename T>
1149 out <<
"FastReport CPU time Real time Allocated Deallocated " <<
label <<
"\n";
1153 template <
typename T>
1155 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1156 ms(
data.time_thread),
1158 +static_cast<int64_t>(kB(
data.allocated)),
1159 -static_cast<int64_t>(kB(
data.deallocated)),
1163 template <
typename T>
1165 out << fmt::sprintf(
"FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1168 +
static_cast<int64_t
>(kB(
data.allocated)),
1169 -static_cast<int64_t>(kB(
data.deallocated)),
1173 template <
typename T>
1175 printHeader(
out,
"Event");
1176 printEventHeader(
out,
"Modules");
1177 auto const& source_d = callgraph_.source();
1178 auto const&
source =
data.modules[source_d.id()];
1179 printEventLine(
out,
source.total, source_d.moduleLabel());
1180 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1181 auto const& proc_d = callgraph_.processDescription(
i);
1183 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1184 for (
unsigned int m : proc_d.modules_) {
1185 auto const& module_d = callgraph_.module(
m);
1187 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1190 printEventLine(
out,
data.total,
"total");
1192 printEventHeader(
out,
"Processes and Paths");
1193 printEventLine(
out,
source.total, source_d.moduleLabel());
1194 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1195 auto const& proc_d = callgraph_.processDescription(
i);
1197 printEventLine(
out,
proc.total,
"process " + proc_d.name_);
1198 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1199 auto const&
name = proc_d.paths_[
p].name_;
1201 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1202 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1204 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1205 auto const&
name = proc_d.endPaths_[
p].name_;
1207 printEventLine(
out,
path.active,
name +
" (only scheduled modules)");
1208 printEventLine(
out,
path.total,
name +
" (including dependencies)");
1211 printEventLine(
out,
data.total,
"total");
1213 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1214 printEventHeader(
out,
"Highlighted modules");
1216 auto const& module_d = callgraph_.module(
m);
1218 printEventLine(
out,
module.total,
" " + module_d.moduleLabel());
1225 template <
typename T>
1229 out <<
"FastReport CPU time avg. when run Real time avg. when run Alloc. avg. when run Dealloc. avg. when run ";
1232 out <<
"FastReport CPU time avg. Real time avg. Alloc. avg. Dealloc. avg. ";
1238 template <
typename T>
1241 out <<
"FastReport CPU time sched. / depend. Real time sched. / depend. Alloc. sched. / depend. Dealloc. sched. / depend. ";
1247 template <
typename T>
1249 out << fmt::sprintf(
1251 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1260 template <
typename T>
1263 out << fmt::sprintf(
1265 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1268 (active ? ms(
data.time_thread) / active : 0),
1270 (active ? ms(
data.time_real) / active : 0),
1272 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1274 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1278 template <
typename T>
1283 out << fmt::sprintf(
1285 "FastReport %10.1f ms %10.1f ms %+10d kB %+10d kB %s\n",
1294 template <
typename T>
1297 out << fmt::sprintf(
1298 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1300 (active ? ms(
data.time_thread) / active : 0),
1302 (active ? ms(
data.time_real) / active : 0),
1304 (active ? +static_cast<int64_t>(kB(
data.allocated) / active) : 0),
1306 (active ? -static_cast<int64_t>(kB(
data.deallocated) / active) : 0),
1310 template <
typename T>
1313 out << fmt::sprintf(
1314 "FastReport %10.1f ms %10.1f ms %10.1f ms %10.1f ms %+10d kB %+10d kB %+10d kB %+10d kB %s\n",
1326 template <
typename T>
1329 printSummaryHeader(
out,
"Modules",
true);
1330 auto const& source_d = callgraph_.source();
1331 auto const&
source =
data.modules[source_d.id()];
1333 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1334 auto const& proc_d = callgraph_.processDescription(
i);
1336 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1337 for (
unsigned int m : proc_d.modules_) {
1338 auto const& module_d = callgraph_.module(
m);
1340 printSummaryLine(
out,
module.total,
data.events,
module.events,
" " + module_d.moduleLabel());
1343 printSummaryLine(
out,
data.total,
data.events,
"total");
1344 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1345 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1346 printSummaryLine(
out,
data.idle,
data.events,
"idle");
1348 printPathSummaryHeader(
out,
"Processes and Paths");
1349 printSummaryLine(
out,
source.total,
data.events, source_d.moduleLabel());
1350 for (
unsigned int i = 0;
i < callgraph_.processes().size(); ++
i) {
1351 auto const& proc_d = callgraph_.processDescription(
i);
1353 printSummaryLine(
out,
proc.total,
data.events,
"process " + proc_d.name_);
1354 for (
unsigned int p = 0;
p <
proc.paths.size(); ++
p) {
1355 auto const&
name = proc_d.paths_[
p].name_;
1359 for (
unsigned int p = 0;
p <
proc.endpaths.size(); ++
p) {
1360 auto const&
name = proc_d.endPaths_[
p].name_;
1365 printSummaryLine(
out,
data.total,
data.events,
"total");
1366 printSummaryLine(
out,
data.eventsetup,
data.events,
"eventsetup");
1367 printSummaryLine(
out,
data.overhead,
data.events,
"other");
1368 printSummaryLine(
out,
data.idle,
data.events,
"idle");
1370 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1371 printSummaryHeader(
out,
"Highlighted modules",
true);
1373 auto const& module_d = callgraph_.module(
m);
1382 template <
typename T>
1384 printEventHeader(
out,
"Transition");
1388 template <
typename T>
1392 T const&
data)
const {
1396 {
"time_thread", ms(
data.time_thread)},
1397 {
"time_real", ms(
data.time_real)},
1398 {
"mem_alloc", kB(
data.allocated)},
1399 {
"mem_free", kB(
data.deallocated)}};
1411 json{{
"time_thread",
"cpu time"}},
1412 json{{
"mem_alloc",
"allocated memory"}},
1413 json{{
"mem_free",
"deallocated memory"}}});
1416 j[
"total"] = encodeToJSON(
"Job",
1417 callgraph_.processDescription(0).name_,
1423 for (
unsigned int i = 0;
i < callgraph_.size(); ++
i) {
1424 auto const&
module = callgraph_.module(
i);
1425 auto const& data_m =
data.modules[
i];
1426 j[
"modules"].push_back(encodeToJSON(
module, data_m));
1430 j[
"modules"].push_back(encodeToJSON(
"other",
"other",
data.events,
data.overhead));
1431 j[
"modules"].push_back(encodeToJSON(
"eventsetup",
"eventsetup",
data.events,
data.eventsetup));
1432 j[
"modules"].push_back(encodeToJSON(
"idle",
"idle",
data.events,
data.idle));
1435 out << std::setw(2) <<
j << std::flush;
1451 unsigned int old_value =
check.fetch_add(1, std::memory_order_acq_rel);
1452 return (old_value == callgraph_.processes().size() - 1);
1458 ignoredSignal(__func__);
1462 auto&
stream = streams_[sid];
1463 auto&
process = callgraph_.processDescription(pid);
1467 for (
unsigned int id :
process.modules_)
1472 bool last = isLastSubprocess(subprocess_event_check_[sid]);
1477 stream.event_measurement.measure_and_store(
stream.event);
1480 unsigned int id = 0;
1484 for (
unsigned int group : boost::irange(0ul, highlight_modules_.size()))
1490 std::lock_guard<std::mutex> guard(summary_mutex_);
1495 if (print_event_summary_) {
1507 auto&
stream = streams_[sid];
1511 subprocess_event_check_[sid] = 0;
1514 auto& measurement = thread();
1515 measurement.measure_and_accumulate(
stream.overhead);
1516 stream.event_measurement = measurement;
1521 unsigned int id = md.
id();
1522 auto&
stream = streams_[sid];
1525 thread().measure_and_store(
module.total);
1532 unsigned int id = pc.
pathID();
1533 auto&
stream = streams_[sid];
1535 data.status =
false;
1544 unsigned int id = pc.
pathID();
1545 auto&
stream = streams_[sid];
1549 pc.
isEndPath() ? callgraph_.processDescription(pid).endPaths_[
id] : callgraph_.processDescription(pid).paths_[
id];
1550 unsigned int index =
path.modules_on_path_.empty() ? 0 :
status.index() + 1;
1551 data.last =
path.modules_on_path_.empty() ? 0 :
path.last_dependency_of_module_[
status.index()];
1553 for (
unsigned int i = 0;
i <
index; ++
i) {
1557 for (
unsigned int i = 0;
i <
data.last; ++
i) {
1565 auto&
stream = streams_[sid];
1566 thread().measure_and_accumulate(
stream.overhead);
1571 unsigned int id = md.
id();
1573 auto&
stream = streams_[sid];
1576 module.has_acquire =
true;
1577 thread().measure_and_store(
module.total);
1582 auto&
stream = streams_[sid];
1583 thread().measure_and_accumulate(
stream.overhead);
1588 unsigned int id = md.
id();
1590 auto&
stream = streams_[sid];
1593 if (
module.has_acquire) {
1594 thread().measure_and_accumulate(
module.total);
1596 thread().measure_and_store(
module.total);
1602 unsupportedSignal(__func__);
1606 unsupportedSignal(__func__);
1610 ignoredSignal(__func__);
1614 ignoredSignal(__func__);
1620 thread().measure_and_accumulate(
stream.overhead);
1627 unsigned int id = md.
id();
1631 thread().measure_and_accumulate(
module.total);
1636 thread().measure_and_accumulate(job_summary_.overhead);
1641 thread().measure_and_accumulate(run_transition_[
index]);
1645 thread().measure_and_accumulate(job_summary_.overhead);
1650 thread().measure_and_accumulate(run_transition_[
index]);
1654 thread().measure_and_accumulate(job_summary_.overhead);
1659 thread().measure_and_accumulate(lumi_transition_[
index]);
1663 thread().measure_and_accumulate(job_summary_.overhead);
1668 thread().measure_and_accumulate(lumi_transition_[
index]);
1672 thread().measure_and_accumulate(job_summary_.overhead);
1677 thread().measure_and_accumulate(run_transition_[
index]);
1681 thread().measure_and_accumulate(job_summary_.overhead);
1686 thread().measure_and_accumulate(run_transition_[
index]);
1690 thread().measure_and_accumulate(job_summary_.overhead);
1695 thread().measure_and_accumulate(lumi_transition_[
index]);
1699 thread().measure_and_accumulate(job_summary_.overhead);
1704 thread().measure_and_accumulate(lumi_transition_[
index]);
1708 auto top =
cc.getTopModuleCallingContext();
1711 unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1712 auto&
stream = streams_[sid];
1713 thread().measure_and_accumulate(
stream.overhead);
1718 auto top =
cc.getTopModuleCallingContext();
1720 unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1721 auto&
stream = streams_[sid];
1722 thread().measure_and_accumulate(
stream.eventsetup);
1729 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard key creation failed: " << ::strerror(
err);
1735 auto ptr = ::pthread_getspecific(key_);
1738 auto it = thread_resources_.emplace_back(std::make_shared<specific_t>(
r));
1739 auto pp =
new std::shared_ptr<specific_t>(*it);
1740 auto err = ::pthread_setspecific(key_,
pp);
1742 throw cms::Exception(
"FastTimerService") <<
"ThreadGuard pthread_setspecific failed: " << ::strerror(
err);
1750 return static_cast<std::shared_ptr<specific_t>*
>(
p);
1756 auto expected =
true;
1757 if ((*ps)->live_.compare_exchange_strong(expected,
false)) {
1759 (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1766 for (
auto&
p : thread_resources_) {
1767 auto expected =
true;
1768 if (
p->live_.compare_exchange_strong(expected,
false)) {
1769 p->measurement_.measure_and_accumulate(
p->resource_);
1775 return (*ptr(::pthread_getspecific(key_)))->measurement_;
1805 desc.addUntracked<
bool>(
"printEventSummary",
false);
1806 desc.addUntracked<
bool>(
"printRunSummary",
true);
1807 desc.addUntracked<
bool>(
"printJobSummary",
true);
1812 desc.addUntracked<
bool>(
"writeJSONSummary",
false);
1815 desc.addUntracked<
bool>(
"enableDQM",
true);
1816 desc.addUntracked<
bool>(
"enableDQMbyModule",
false);
1817 desc.addUntracked<
bool>(
"enableDQMbyPath",
false);
1818 desc.addUntracked<
bool>(
"enableDQMbyLumiSection",
false);
1819 desc.addUntracked<
bool>(
"enableDQMbyProcesses",
false);
1820 desc.addUntracked<
bool>(
"enableDQMTransitions",
false);
1821 desc.addUntracked<
double>(
"dqmTimeRange", 1000.);
1822 desc.addUntracked<
double>(
"dqmTimeResolution", 5.);
1823 desc.addUntracked<
double>(
"dqmMemoryRange", 1000000.);
1824 desc.addUntracked<
double>(
"dqmMemoryResolution", 5000.);
1825 desc.addUntracked<
double>(
"dqmPathTimeRange", 100.);
1826 desc.addUntracked<
double>(
"dqmPathTimeResolution", 0.5);
1827 desc.addUntracked<
double>(
"dqmPathMemoryRange", 1000000.);
1828 desc.addUntracked<
double>(
"dqmPathMemoryResolution", 5000.);
1829 desc.addUntracked<
double>(
"dqmModuleTimeRange", 40.);
1830 desc.addUntracked<
double>(
"dqmModuleTimeResolution", 0.2);
1831 desc.addUntracked<
double>(
"dqmModuleMemoryRange", 100000.);
1832 desc.addUntracked<
double>(
"dqmModuleMemoryResolution", 500.);
1833 desc.addUntracked<
unsigned>(
"dqmLumiSectionsRange", 2500);
1837 highlightModulesDescription.
addUntracked<std::vector<std::string>>(
"modules", {});
1838 highlightModulesDescription.addUntracked<
std::string>(
"label",
"producers");
1839 desc.addVPSetUntracked(
"highlightModules", highlightModulesDescription, {});
1844 ->setComment(
"This parameter is obsolete and will be ignored.");
1846 ->setComment(
"This parameter is obsolete and will be ignored.");
1848 ->setComment(
"This parameter is obsolete and will be ignored.");
1850 ->setComment(
"This parameter is obsolete and will be ignored.");
1852 ->setComment(
"This parameter is obsolete and will be ignored.");
1854 ->setComment(
"This parameter is obsolete and will be ignored.");
1856 ->setComment(
"This parameter is obsolete and will be ignored.");
1858 ->setComment(
"This parameter is obsolete and will be ignored.");
1860 ->setComment(
"This parameter is obsolete and will be ignored.");
1862 ->setComment(
"This parameter is obsolete and will be ignored.");
1864 ->setComment(
"This parameter is obsolete and will be ignored.");
1866 ->setComment(
"This parameter is obsolete and will be ignored.");
1868 ->setComment(
"This parameter is obsolete and will be ignored.");
1870 ->setComment(
"This parameter is obsolete and will be ignored.");
1872 descriptions.
add(
"FastTimerService",
desc);
ResourcesPerJob()=default
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
constexpr int32_t ceil(float num)
void fill(Resources const &, unsigned int lumisection)
ResourcesPerProcess operator+(ResourcesPerProcess const &other) const
void postStreamBeginRun(edm::StreamContext const &)
ModuleDescription const * moduleDescription() const
void postGlobalEndLumi(edm::GlobalContext const &)
LuminosityBlockNumber_t luminosityBlock() const
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void fill_lumi(AtomicResources const &, unsigned int lumisection)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
ResourcesPerJob operator+(ResourcesPerJob const &other) const
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
void writeSummaryJSON(ResourcesPerJob const &data, std::string const &filename) const
virtual void setCurrentFolder(std::string const &fullpath)
const bool write_json_summary_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, ProcessCallGraph::ProcessType const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, unsigned int lumisections, bool bypath, bool byls)
ResourcesPerJob job_summary_
PlotsPerJob(ProcessCallGraph const &job, std::vector< GroupOfModules > const &groups)
PlotsPerProcess(ProcessCallGraph::ProcessType const &)
void preGlobalEndLumi(edm::GlobalContext const &)
uint32_t cc[maxCellsPerHit]
edm::ModuleDescription const & source() const
virtual std::string pwd()
void preESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
const std::string json_filename_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
void postESModule(edm::eventsetup::EventSetupRecordKey const &, edm::ESModuleCallingContext const &)
unsigned int concurrent_runs_
void postSourceRun(edm::RunIndex)
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
ResourcesPerProcess & operator+=(ResourcesPerProcess const &other)
void book(dqm::reco::DQMStore::IBooker &, std::string const &, ProcessCallGraph const &, ProcessCallGraph::PathType const &, PlotRanges const &ranges, unsigned int lumisections, bool byls)
const bool print_job_summary_
const bool enable_dqm_bypath_
void book(dqm::reco::DQMStore::IBooker &, ProcessCallGraph const &, std::vector< GroupOfModules > const &, PlotRanges const &event_ranges, PlotRanges const &path_ranges, PlotRanges const &module_ranges, unsigned int lumisections, bool bymodule, bool bypath, bool byls, bool transitions)
AtomicResources operator+(AtomicResources const &other) const
LuminosityBlockNumber_t luminosityBlock() const
void printSummary(const std::map< unsigned int, SiStripDetSummary::Values > &map)
edm::ModuleDescription const & module(unsigned int module) const
void postStreamBeginLumi(edm::StreamContext const &)
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< ProcessType > const & processes() const
MonitorElement * book1DD(TString const &name, TString const &title, int nchX, double lowX, double highX, FUNC onbooking=NOOP())
ResourcesPerJob & operator+=(ResourcesPerJob const &other)
virtual void setXTitle(std::string const &title)
void fill(ProcessCallGraph::ProcessType const &, ResourcesPerJob const &, ResourcesPerProcess const &, unsigned int ls)
ResourcesPerModule operator+(ResourcesPerModule const &other) const
void printEvent(T &out, ResourcesPerJob const &) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
MonitorElement * bookProfile(TString const &name, TString const &title, int nchX, double lowX, double highX, int, double lowY, double highY, char const *option="s", FUNC onbooking=NOOP())
const std::string processor_model
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
StreamID const & streamID() const
void postEvent(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
static uint64_t deallocated()
void fill_fraction(Resources const &, Resources const &, unsigned int lumisection)
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath & operator+=(ResourcesPerPath const &other)
Resources operator+(Resources const &other) const
unsigned int concurrent_lumis_
ResourcesPerModule() noexcept
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
bool register_thread(FastTimerService::AtomicResources &r)
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
json encodeToJSON(std::string const &type, std::string const &label, unsigned int events, T const &data) const
ProcessContext const * processContext() const
const bool enable_dqm_transitions_
LuminosityBlockIndex const & luminosityBlockIndex() const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
ProcessContext const * processContext() const
void ignoredSignal(const std::string &signal) const
LuminosityBlockID const & luminosityBlockID() const
void measure_and_store(Resources &store) noexcept
const bool enable_dqm_bymodule_
unsigned int pathID() const
void book(dqm::reco::DQMStore::IBooker &, std::string const &name, std::string const &title, PlotRanges const &ranges, unsigned int lumisections, bool byls)
void fill_run(AtomicResources const &)
#define DEFINE_FWK_SERVICE(type)
static bool is_available()
const bool enable_dqm_byls_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
virtual void setYTitle(std::string const &title)
LuminosityBlockIndex const & luminosityBlockIndex() const
static std::shared_ptr< specific_t > * ptr(void *p)
unsigned long long uint64_t
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printEventHeader(T &out, std::string const &label) const
static uint64_t allocated()
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
static void retire_thread(void *t)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void printEventLine(T &out, Resources const &data, std::string const &label) const
void preStreamBeginLumi(edm::StreamContext const &)
ResourcesPerProcess(ProcessCallGraph::ProcessType const &process)
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printPathSummaryHeader(T &out, std::string const &label) const
bool isSubProcess() const
void preStreamEndRun(edm::StreamContext const &)
RunIndex const & runIndex() const
char data[epos_bytes_allocation]
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printHeader(T &out, std::string const &label) const
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_accumulate(Resources &store) noexcept
ResourcesPerModule & operator+=(ResourcesPerModule const &other)
EventID const & eventID() const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ResourcesPerPath operator+(ResourcesPerPath const &other) const
void preGlobalBeginLumi(edm::GlobalContext const &)
AtomicResources & operator+=(AtomicResources const &other)
Resources & operator+=(Resources const &other)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ProcessType const & processDescription(unsigned int) const
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
unsigned int value() const
Log< level::Warning, false > LogWarning
MonitorElement * book1D(TString const &name, TString const &title, int const nchX, double const lowX, double const highX, FUNC onbooking=NOOP())
UseScope< MonitorElementData::Scope::RUN > UseRunScope
AtomicResources & operator=(AtomicResources const &other)
ProcessCallGraph callgraph_
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::string const & moduleLabel() const
void preStreamBeginRun(edm::StreamContext const &)
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
RunIndex const & runIndex() const
static std::string const source
static void fixForDQM(std::string &label)
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
void fill(ProcessCallGraph const &, ResourcesPerJob const &, unsigned int ls)
void fill(ProcessCallGraph::PathType const &, ResourcesPerJob const &, ResourcesPerPath const &, unsigned int lumisection)