34 #include <sys/resource.h> 41 namespace eventsetup {
42 struct ComponentDescription;
50 using time_point = std::chrono::steady_clock::time_point;
68 void lastPostEvent(std::chrono::steady_clock::duration curr_event_time,
165 getrusage(RUSAGE_CHILDREN, &
usage);
166 double totalCPUTime = (double)
usage.ru_utime.tv_sec + (
double(
usage.ru_utime.tv_usec) * 1E-6);
167 totalCPUTime += (double)
usage.ru_stime.tv_sec + (
double(
usage.ru_stime.tv_usec) * 1E-6);
174 getrusage(RUSAGE_SELF, &
usage);
176 double totalCPUTime = 0.0;
178 totalCPUTime = (double)
usage.ru_utime.tv_sec + (
double(
usage.ru_utime.tv_usec) * 1E-6);
180 totalCPUTime += (double)
usage.ru_stime.tv_sec + (
double(
usage.ru_stime.tv_usec) * 1E-6);
183 getrusage(RUSAGE_CHILDREN, &
usage);
184 totalCPUTime += (double)
usage.ru_utime.tv_sec + (
double(
usage.ru_utime.tv_usec) * 1E-6);
185 totalCPUTime += (double)
usage.ru_stime.tv_sec + (
double(
usage.ru_stime.tv_usec) * 1E-6);
194 static thread_local std::vector<std::chrono::steady_clock::time_point> s_stack;
200 assert(!modStack.empty());
201 auto curr_module_time = modStack.back();
203 std::chrono::duration<double, std::ratio<1, 1>>
t =
getTime() - curr_module_time;
207 static void pushStack(
bool configuredInTopLevelProcess) {
208 if (!configuredInTopLevelProcess) {
222 summary_only_(iPS.getUntrackedParameter<
bool>(
"summaryOnly")),
223 report_summary_(iPS.getUntrackedParameter<
bool>(
"useJobReport")),
224 threshold_(iPS.getUntrackedParameter<double>(
"excessiveTimeThreshold")),
227 total_event_count_(0),
228 begin_lumi_count_(0),
230 configuredInTopLevelProcess_{
false},
234 iRegistry.watchPreEndJob([
this]() {
243 bool checkThreshold =
true;
244 if (threshold_ <= 0.0) {
247 checkThreshold =
false;
250 if ((not summary_only_) || (checkThreshold)) {
256 if (checkThreshold) {
308 auto preESModuleLambda = [
this](
auto const& recordKey,
auto const&
context) {
312 bool foundSlot =
false;
314 for (
size_t i = 0;
i < eventSetupModuleStartTimes_.size(); ++
i) {
315 auto&
slot = *eventSetupModuleStartTimes_[
i];
316 std::chrono::steady_clock::time_point expect;
319 eventSetupModuleCallInfo_[
i].first = uintptr_t(
context.componentDescription());
320 eventSetupModuleCallInfo_[
i].second = recordKey;
326 }
while (not foundSlot);
328 iRegistry.watchPreESModule(preESModuleLambda);
329 iRegistry.watchPreESModuleAcquire(preESModuleLambda);
331 auto postESModuleLambda = [
this](
auto const& recordKey,
auto const&
context) {
334 for (
size_t i = 0;
i < eventSetupModuleStartTimes_.size(); ++
i) {
335 auto const&
info = eventSetupModuleCallInfo_[
i];
336 if (
info.first == uintptr_t(
context.componentDescription()) and
info.second == recordKey) {
337 auto startTime = eventSetupModuleStartTimes_[
i]->exchange(std::chrono::steady_clock::time_point());
338 auto expect = accumulatedEventSetupModuleTimings_.load();
339 double_seconds timeDiff = stopTime -
startTime;
340 auto accumulatedTime = expect + timeDiff.count();
341 while (not accumulatedEventSetupModuleTimings_.compare_exchange_strong(expect, accumulatedTime)) {
342 accumulatedTime = expect + timeDiff.count();
348 iRegistry.watchPostESModule(postESModuleLambda);
349 iRegistry.watchPostESModuleAcquire(postESModuleLambda);
354 iRegistry.watchPreallocate([
this](service::SystemBounds
const& iBounds) {
355 nStreams_ = iBounds.maxNumberOfStreams();
356 nThreads_ = iBounds.maxNumberOfThreads();
357 curr_events_time_.resize(nStreams_, time_point());
358 sum_events_time_.resize(nStreams_, 0.);
359 max_events_time_.resize(nStreams_, 0.);
360 min_events_time_.resize(nStreams_, 1.E6);
361 eventSetupModuleStartTimes_.reserve(nThreads_);
362 for (
unsigned int i = 0;
i < nThreads_; ++
i) {
363 eventSetupModuleStartTimes_.emplace_back(std::make_unique<std::atomic<time_point>>());
365 eventSetupModuleCallInfo_.resize(nThreads_);
367 for (
unsigned int i = 0;
i < nStreams_; ++
i) {
368 countSubProcessesPreEvent_.emplace_back(std::make_unique<std::atomic<unsigned int>>(0));
369 countSubProcessesPostEvent_.emplace_back(std::make_unique<std::atomic<unsigned int>>(0));
372 setTaskCallbacks(iRegistry);
423 while (not
extra_job_cpu_.compare_exchange_strong(expected, expected + iTime)) {
431 desc.addUntracked<
bool>(
"summaryOnly",
false)->setComment(
"If 'true' do not report timing for each event");
432 desc.addUntracked<
bool>(
"useJobReport",
true)->setComment(
"If 'true' write summary information to JobReport");
433 desc.addUntracked<
double>(
"excessiveTimeThreshold", 0.)
435 "Amount of time in seconds before reporting a module or source has taken excessive time. A value of 0.0 " 436 "turns off this reporting.");
437 descriptions.
add(
"Timing",
desc);
438 descriptions.
setComment(
"This service reports the time it takes to run each module in a job.");
459 <<
"TimeReport> Report activated" 461 <<
"TimeReport> Report columns headings for events: " 462 <<
"eventnum runnum timetaken\n" 463 <<
"TimeReport> Report columns headings for modules: " 464 <<
"eventnum runnum modulelabel modulename timetaken\n" 465 <<
"TimeReport> JobTime=" 473 LogImportant(
"TimeReport") <<
"\nTimeReport> This instance of the Timing Service will be disabled because it " 474 "is configured in a SubProcess.\n" 475 <<
"If multiple instances of the TimingService were configured only the one in the " 476 "top level process will function.\n" 477 <<
"The other instance(s) will simply print this message and do nothing.\n\n";
481 const auto job_end_time =
getTime();
482 const double job_end_cpu =
getCPU();
505 total_loop_time = 0.0;
506 total_loop_cpu = 0.0;
509 double sum_all_events_time = 0;
511 sum_all_events_time +=
t;
514 double average_event_time = 0.0;
519 double event_throughput = 0.0;
520 if (total_loop_time != 0.0) {
524 LogImportant(
"TimeReport") <<
"TimeReport> Time report complete in " << total_job_time <<
" seconds" 526 <<
" Time Summary: \n" 527 <<
" - Min event: " << min_event_time <<
"\n" 528 <<
" - Max event: " << max_event_time <<
"\n" 529 <<
" - Avg event: " << average_event_time <<
"\n" 530 <<
" - Total loop: " << total_loop_time <<
"\n" 531 <<
" - Total init: " << total_initialization_time <<
"\n" 532 <<
" - Total job: " << total_job_time <<
"\n" 535 <<
" Event Throughput: " << event_throughput <<
" ev/s\n" 536 <<
" CPU Summary: \n" 537 <<
" - Total loop: " << total_loop_cpu <<
"\n" 538 <<
" - Total init: " << total_initialization_cpu <<
"\n" 540 <<
" - Total children: " << job_end_children_cpu <<
"\n" 541 <<
" - Total job: " << total_job_cpu <<
"\n" 542 <<
" Processing Summary: \n" 549 std::map<std::string, std::string> reportData;
551 reportData.insert(std::make_pair(
"MinEventTime",
d2str(min_event_time)));
552 reportData.insert(std::make_pair(
"MaxEventTime",
d2str(max_event_time)));
553 reportData.insert(std::make_pair(
"AvgEventTime",
d2str(average_event_time)));
554 reportData.insert(std::make_pair(
"EventThroughput",
d2str(event_throughput)));
555 reportData.insert(std::make_pair(
"TotalJobTime",
d2str(total_job_time)));
556 reportData.insert(std::make_pair(
"TotalJobCPU",
d2str(total_job_cpu)));
557 reportData.insert(std::make_pair(
"TotalJobChildrenCPU",
d2str(job_end_children_cpu)));
558 reportData.insert(std::make_pair(
"TotalLoopTime",
d2str(total_loop_time)));
561 reportData.insert(std::make_pair(
"TotalLoopCPU",
d2str(total_loop_cpu)));
562 reportData.insert(std::make_pair(
"TotalInitTime",
d2str(total_initialization_time)));
563 reportData.insert(std::make_pair(
"TotalInitCPU",
d2str(total_initialization_cpu)));
564 reportData.insert(std::make_pair(
"NumberOfStreams",
ui2str(
nStreams_)));
565 reportData.insert(std::make_pair(
"NumberOfThreads",
ui2str(
nThreads_)));
566 reportSvc->reportPerformanceSummary(
"Timing", reportData);
568 std::map<std::string, std::string> reportData1;
572 reportSvc->reportPerformanceSummary(
"ProcessingSummary", reportData1);
612 double curr_event_time_d =
double_seconds(curr_event_time).count();
616 auto const& eventID = iStream.
eventID();
617 LogPrint(
"TimeEvent") <<
"TimeEvent> " << eventID.event() <<
" " << eventID.run() <<
" " << curr_event_time_d;
630 auto const& eventID = iStream.
eventID();
634 LogPrint(
"TimeModule") <<
"TimeModule> " << eventID.event() <<
" " << eventID.run() <<
" " <<
desc.moduleLabel()
635 <<
" " <<
desc.moduleName() <<
" " <<
t;
696 <<
"ExcessiveTime: Module used " <<
t 697 <<
" seconds of time which exceeds the error threshold configured in the Timing Service of " <<
threshold_ 704 const auto presentTime =
getTime();
705 bool expected =
false;
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
std::vector< double > sum_events_time_
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
ModuleDescription const * moduleDescription() const
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
std::vector< double > max_events_time_
void watchPreModuleEventAcquire(PreModuleEventAcquire::slot_type const &iSlot)
void setTaskCallbacks(ActivityRegistry &)
static std::chrono::steady_clock::time_point getTime()
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void preModule(ModuleDescription const &md)
void lastPostEvent(std::chrono::steady_clock::duration curr_event_time, unsigned int index, StreamContext const &iStream)
void watchPreEventReadFromSource(PreEventReadFromSource::slot_type const &iSlot)
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
void postEvent(StreamContext const &)
void preModuleStream(StreamContext const &, ModuleCallingContext const &)
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
std::atomic< unsigned long > begin_lumi_count_
Log< level::Error, false > LogError
void preSourceLumi(LuminosityBlockIndex)
void watchPreESSyncIOV(PreESSyncIOV::slot_type const &iSlot)
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
void preModuleGlobal(GlobalContext const &, ModuleCallingContext const &)
void postModule(ModuleDescription const &md)
unsigned int num_running_tasks_
std::atomic< bool > updating_task_info_
void postGlobalBeginRun(GlobalContext const &)
void preSourceRun(RunIndex)
double postCommon() const
void postModuleStream(StreamContext const &, ModuleCallingContext const &)
void preOpenFile(std::string const &)
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
double total_time_without_tasks_
Timing(ParameterSet const &, ActivityRegistry &)
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
StreamID const & streamID() const
static std::vector< std::chrono::steady_clock::time_point > & moduleTimeStack()
void postModuleEvent(StreamContext const &, ModuleCallingContext const &)
#define CMS_THREAD_GUARD(_var_)
void watchPostModuleEventAcquire(PostModuleEventAcquire::slot_type const &iSlot)
Log< level::Error, true > LogImportant
void runningTasksChanged(bool iMoreTasks)
void postSourceLumi(LuminosityBlockIndex)
static void pushStack(bool configuredInTopLevelProcess)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::chrono::steady_clock::time_point time_point
std::atomic< double > end_loop_cpu_
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
ProcessContext const * processContext() const
Log< level::Warning, true > LogPrint
std::vector< time_point > curr_events_time_
double getTotalCPU() const override
std::vector< std::unique_ptr< std::atomic< unsigned int > > > countSubProcessesPreEvent_
std::vector< std::unique_ptr< std::atomic< unsigned int > > > countSubProcessesPostEvent_
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
static std::string ui2str(unsigned int i)
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
void postOpenFile(std::string const &)
void setComment(std::string const &value)
void postGlobalBeginLumi(GlobalContext const &)
std::atomic< unsigned long > begin_run_count_
std::atomic< time_point > end_loop_time_
std::atomic< double > accumulatedEventSetupModuleTimings_
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
std::atomic< unsigned long > total_event_count_
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
void addToCPUTime(double iTime) override
static std::string d2str(double d)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
bool configuredInTopLevelProcess_
edm::serviceregistry::AllArgsMaker< edm::TimingServiceBase, Timing > TimingMaker
void postSourceRun(RunIndex)
void postModuleGlobal(GlobalContext const &, ModuleCallingContext const &)
time_point curr_job_time_
void preBeginJob(PathsAndConsumesOfModulesBase const &, ProcessContext const &)
bool isSubProcess() const
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
EventID const & eventID() const
std::chrono::duration< double, std::ratio< 1, 1 > > double_seconds
void postSourceEvent(StreamID)
void watchPostEventReadFromSource(PostEventReadFromSource::slot_type const &iSlot)
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
unsigned int value() const
static std::chrono::steady_clock::time_point jobStartTime()
std::atomic< double > extra_job_cpu_
time_point last_task_change_time_
std::vector< std::unique_ptr< std::atomic< time_point > > > eventSetupModuleStartTimes_
void preSourceEvent(StreamID)
void preEvent(StreamContext const &)
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
static double getChildrenCPU()
std::vector< std::pair< uintptr_t, eventsetup::EventSetupRecordKey > > eventSetupModuleCallInfo_
std::vector< double > min_events_time_
unsigned int nSubProcesses_
void watchPostESSyncIOV(PostESSyncIOV::slot_type const &iSlot)