CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // system headers
5 #include <unistd.h>
6 
7 // C++ headers
8 #include <chrono>
9 #include <cmath>
10 #include <map>
11 #include <mutex>
12 #include <string>
13 #include <unordered_map>
14 
15 // boost headers
16 #include <boost/chrono.hpp>
17 
18 // tbb headers
19 #include <tbb/concurrent_unordered_set.h>
20 #include <tbb/enumerable_thread_specific.h>
21 #include <tbb/task_scheduler_observer.h>
22 
23 // CMSSW headers
43 
44 
45 /*
46 procesing time is divided into
47  - source
48  - event processing, sum of the time spent in all the modules
49 */
50 
51 class FastTimerService : public tbb::task_scheduler_observer
52 {
53 public:
55  ~FastTimerService() override;
56 
57 private:
58  double queryModuleTime_(edm::StreamID, unsigned int id) const;
59 
60 public:
61  // query the time spent in a module/path/process (available after it has run)
62  double querySourceTime(edm::StreamID) const;
63  double queryEventTime(edm::StreamID) const;
64  double queryEventTime(edm::StreamID, std::string const& process) const;
66  double queryModuleTime(edm::StreamID, unsigned int id) const;
67  double queryModuleTimeByLabel(edm::StreamID, std::string const& module) const;
68  double queryModuleTimeByLabel(edm::StreamID, std::string const& process, const std::string & module) const;
69  double queryPathTime(edm::StreamID, std::string const& path) const;
70  double queryPathTime(edm::StreamID, std::string const& process, std::string const& path) const;
71  double queryHighlightTime(edm::StreamID sid, std::string const& label) const;
72 
73 private:
74  void ignoredSignal(const std::string& signal) const;
75  void unsupportedSignal(const std::string& signal) const;
76 
77  // these signal pairs are not guaranteed to happen in the same thread
78 
80 
82  void postBeginJob();
83 
84  void postEndJob();
85 
88 
91 
94 
97 
100 
103 
106 
109 
110  void preEvent(edm::StreamContext const&);
111  void postEvent(edm::StreamContext const&);
112 
113  void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
115 
118 
119  // these signal pairs are guaranteed to be called within the same thread
120 
121  //void preOpenFile(std::string const&, bool);
122  //void postOpenFile(std::string const&, bool);
123 
124  //void preCloseFile(std::string const&, bool);
125  //void postCloseFile(std::string const&, bool);
126 
128  //void postSourceConstruction(edm::ModuleDescription const&);
129 
132 
135 
138 
139  //void preModuleConstruction(edm::ModuleDescription const&);
140  //void postModuleConstruction(edm::ModuleDescription const&);
141 
142  //void preModuleBeginJob(edm::ModuleDescription const&);
143  //void postModuleBeginJob(edm::ModuleDescription const&);
144 
145  //void preModuleEndJob(edm::ModuleDescription const&);
146  //void postModuleEndJob(edm::ModuleDescription const&);
147 
148  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
149  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
150 
151  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
152  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
153 
156 
159 
162 
165 
168 
171 
174 
177 
180 
183 
186 
187  // inherited from TBB task_scheduler_observer
188  void on_scheduler_entry(bool worker) final;
189  void on_scheduler_exit(bool worker) final;
190 
191 public:
192  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
193 
194 private:
195  // forward declarations
196  struct Resources;
197  struct AtomicResources;
198 
199  // per-thread measurements
200  struct Measurement {
201  public:
202  Measurement();
203  void measure();
204  void measure_and_store(Resources & store);
206 
207  public:
208  #ifdef DEBUG_THREAD_CONCURRENCY
210  #endif // DEBUG_THREAD_CONCURRENCY
211  boost::chrono::thread_clock::time_point time_thread;
212  boost::chrono::high_resolution_clock::time_point time_real;
215  };
216 
217  // highlight a group of modules
218  struct GroupOfModules {
219  public:
221  std::vector<unsigned int> modules;
222  };
223 
224  // resources being monitored by the service
225  struct Resources {
226  public:
227  Resources();
228  void reset();
230  Resources operator+(Resources const& other) const;
231 
232  public:
237  };
238 
239  // atomic version of Resources
241  public:
242  AtomicResources();
244  void reset();
245 
246  AtomicResources & operator=(AtomicResources const& other);
248  AtomicResources operator+(AtomicResources const& other) const;
249 
250  public:
251  std::atomic<boost::chrono::nanoseconds::rep> time_thread;
252  std::atomic<boost::chrono::nanoseconds::rep> time_real;
253  std::atomic<uint64_t> allocated;
254  std::atomic<uint64_t> deallocated;
255  };
256 
258  public:
259  void reset();
261  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
262 
263  public:
265  unsigned events;
266  };
267 
269  public:
270  void reset();
272  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
273 
274  public:
275  Resources active; // resources used by all modules on this path
276  Resources total; // resources used by all modules on this path, and their dependencies
277  unsigned last; // one-past-the last module that ran on this path
278  bool status; // whether the path accepted or rejected the event
279  };
280 
282  public:
284  void reset();
287 
288  public:
290  std::vector<ResourcesPerPath> paths;
291  std::vector<ResourcesPerPath> endpaths;
292  };
293 
295  public:
296  ResourcesPerJob() = default;
297  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
298  void reset();
300  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
301 
302  public:
305  Resources event; // total time etc. spent between preSourceEvent and postEvent
307  std::vector<Resources> highlight;
308  std::vector<ResourcesPerModule> modules;
309  std::vector<ResourcesPerProcess> processes;
310  unsigned events;
311  };
312 
313  // plot ranges and resolution
314  struct PlotRanges {
315  double time_range;
317  double memory_range;
319  };
320 
321  // plots associated to each module or other element (path, process, etc)
323  public:
324  PlotsPerElement();
325  void book(DQMStore::ConcurrentBooker &, std::string const& name, std::string const& title, PlotRanges const& ranges, unsigned int lumisections, bool byls);
326  void fill(Resources const&, unsigned int lumisection);
327  void fill(AtomicResources const&, unsigned int lumisection);
328  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
329 
330  private:
331  // resources spent in the module
340  };
341 
342  // plots associated to each path or endpath
343  class PlotsPerPath {
344  public:
345  PlotsPerPath();
346  void book(DQMStore::ConcurrentBooker &, std::string const &, ProcessCallGraph const&, ProcessCallGraph::PathType const&, PlotRanges const& ranges, unsigned int lumisections, bool byls);
347  void fill(ProcessCallGraph::PathType const&, ResourcesPerJob const&, ResourcesPerPath const&, unsigned int lumisection);
348 
349  private:
350  // resources spent in all the modules in the path, including their dependencies
352 
353  // Note:
354  // a TH1F has 7 significant digits, while a 24-hour long run could process
355  // order of 10 billion events; a 64-bit long integer would work and might
356  // be better suited than a double, but there is no "TH1L" in ROOT.
357 
358  // how many times each module and their dependencies has run
360  // resources spent in each module and their dependencies
365  };
366 
368  public:
371  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
372  unsigned int lumisections, bool bypath, bool byls);
373  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
374 
375  private:
376  // resources spent in all the modules of the (sub)process
378  // resources spent in each path and endpath
379  std::vector<PlotsPerPath> paths_;
380  std::vector<PlotsPerPath> endpaths_;
381  };
382 
383  class PlotsPerJob {
384  public:
385  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
386  void book(DQMStore::ConcurrentBooker &, ProcessCallGraph const&, std::vector<GroupOfModules> const&,
387  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
388  PlotRanges const& module_ranges, unsigned int lumisections,
389  bool bymodule, bool bypath, bool byls, bool transitions);
390  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
391  void fill_run(AtomicResources const&);
392  void fill_lumi(AtomicResources const&, unsigned int lumisection);
393 
394  private:
395  // resources spent in all the modules of the job
399  // resources spent in the modules' lumi and run transitions
402  // resources spent in the highlighted modules
403  std::vector<PlotsPerElement> highlight_;
404  // resources spent in each module
405  std::vector<PlotsPerElement> modules_;
406  // resources spent in each (sub)process
407  std::vector<PlotsPerProcess> processes_;
408  };
409 
410 
411  // keep track of the dependencies among modules
413 
414  // per-stream information
415  std::vector<ResourcesPerJob> streams_;
416 
417  // concurrent histograms and profiles
418  std::unique_ptr<PlotsPerJob> plots_;
419 
420  // per-lumi and per-run information
421  std::vector<AtomicResources> lumi_transition_; // resources spent in the modules' global and stream lumi transitions
422  std::vector<AtomicResources> run_transition_; // resources spent in the modules' global and stream run transitions
423  AtomicResources overhead_; // resources spent outside of the modules' transitions
424 
425  // summary data
426  ResourcesPerJob job_summary_; // whole event time accounting per-job
427  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
428  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
429 
430  // per-thread quantities, lazily allocated
431  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
433 
434  // atomic variables to keep track of the completion of each step, process by process
435  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
436  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_lumi_check_;
437  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
438 
439  // retrieve the current thread's per-thread quantities
440  Measurement & thread();
441 
442  // job configuration
443  unsigned int concurrent_lumis_;
444  unsigned int concurrent_runs_;
445  unsigned int concurrent_streams_;
446  unsigned int concurrent_threads_;
447 
448  // logging configuration
449  const bool print_event_summary_; // print the time spent in each process, path and module after every event
450  const bool print_run_summary_; // print the time spent in each process, path and module for each run
451  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
452 
453  // dqm configuration
454  bool enable_dqm_; // non const, depends on the availability of the DQMStore
456  const bool enable_dqm_bypath_;
457  const bool enable_dqm_byls_;
460 
464  const unsigned int dqm_lumisections_range_;
466 
467  std::vector<edm::ParameterSet>
468  highlight_module_psets_; // non-const, cleared in postBeginJob()
469  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
470 
471  // log unsupported signals
472  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
473 
474  // print the resource usage summary for en event, a run, or the while job
475  template <typename T>
476  void printHeader(T& out, std::string const & label) const;
477 
478  template <typename T>
479  void printEventHeader(T& out, std::string const & label) const;
480 
481  template <typename T>
482  void printEventLine(T& out, Resources const& data, std::string const & label) const;
483 
484  template <typename T>
485  void printEventLine(T& out, AtomicResources const& data, std::string const & label) const;
486 
487  template <typename T>
488  void printEvent(T& out, ResourcesPerJob const&) const;
489 
490  template <typename T>
491  void printSummaryHeader(T& out, std::string const & label, bool detailed) const;
492 
493  template <typename T>
494  void printPathSummaryHeader(T& out, std::string const & label) const;
495 
496  template <typename T>
497  void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
498 
499  template <typename T>
500  void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
501 
502  template <typename T>
503  void printPathSummaryLine(T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const;
504 
505  template <typename T>
506  void printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const;
507 
508  template <typename T>
509  void printTransition(T& out, AtomicResources const& data, std::string const& label) const;
510 
511  // check if this is the first process being signalled
514 
515  // check if this is the lest process being signalled
516  bool isLastSubprocess(std::atomic<unsigned int>& check);
517 };
518 
519 #endif // ! FastTimerService_h
ConcurrentMonitorElement time_thread_
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
static boost::mutex mutex
Definition: Proxy.cc:11
ConcurrentMonitorElement time_real_byls_
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_lumi_check_
void postStreamBeginRun(edm::StreamContext const &)
double queryEventTime(edm::StreamID) const
ConcurrentMonitorElement time_thread_byls_
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
std::vector< edm::ParameterSet > highlight_module_psets_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ConcurrentMonitorElement time_real_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
double queryHighlightTime(edm::StreamID sid, std::string const &label) const
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
std::vector< unsigned int > modules
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
double queryModuleTime_(edm::StreamID, unsigned int id) const
AtomicResources overhead_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
void measure_and_accumulate(AtomicResources &store)
std::atomic< uint64_t > deallocated
ConcurrentMonitorElement module_time_thread_total_
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
void postSourceRun(edm::RunIndex)
std::vector< ResourcesPerJob > streams_
std::atomic< uint64_t > allocated
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const bool print_job_summary_
const bool enable_dqm_bypath_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
const PlotRanges dqm_path_ranges_
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
void printPathSummaryHeader(T &out, std::string const &label) const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
double queryModuleTime(edm::StreamID, const edm::ModuleDescription &module) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
ConcurrentMonitorElement deallocated_byls_
std::vector< ResourcesPerJob > run_summary_
ConcurrentMonitorElement module_deallocated_total_
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
ConcurrentMonitorElement module_counter_
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
unsigned int concurrent_lumis_
std::string dqm_path_
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
const bool enable_dqm_transitions_
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::atomic< boost::chrono::nanoseconds::rep > time_thread
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
string ranges
Definition: diffTwoXMLs.py:78
ConcurrentMonitorElement allocated_byls_
~FastTimerService() override
const PlotRanges dqm_module_ranges_
ConcurrentMonitorElement deallocated_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:348
std::vector< PlotsPerProcess > processes_
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
unsigned long long uint64_t
Definition: Time.h:15
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
double querySourceTime(edm::StreamID) const
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
std::vector< AtomicResources > run_transition_
ConcurrentMonitorElement module_allocated_total_
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void ignoredSignal(const std::string &signal) const
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
ConcurrentMonitorElement allocated_
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
std::atomic< boost::chrono::nanoseconds::rep > time_real
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_store(Resources &store)
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< PlotsPerJob > plots_
double queryPathTime(edm::StreamID, std::string const &path) const
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
ConcurrentMonitorElement module_time_real_total_
double queryModuleTimeByLabel(edm::StreamID, std::string const &module) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
def check(config)
Definition: trackerTree.py:14
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
Definition: vlib.h:208
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< AtomicResources > lumi_transition_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)