CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // system headers
5 #include <unistd.h>
6 
7 // C++ headers
8 #include <chrono>
9 #include <cmath>
10 #include <map>
11 #include <mutex>
12 #include <string>
13 #include <unordered_map>
14 
15 // boost headers
16 #include <boost/chrono.hpp>
17 
18 // tbb headers
19 #include <tbb/concurrent_unordered_set.h>
20 #include <tbb/enumerable_thread_specific.h>
21 #include <tbb/task_scheduler_observer.h>
22 
23 // CMSSW headers
43 
44 
45 /*
46 procesing time is divided into
47  - source
48  - event processing, sum of the time spent in all the modules
49 */
50 
51 class FastTimerService : public tbb::task_scheduler_observer
52 {
53 public:
55  ~FastTimerService() override = default;
56 
57 private:
58  void ignoredSignal(const std::string& signal) const;
59  void unsupportedSignal(const std::string& signal) const;
60 
61  // these signal pairs are not guaranteed to happen in the same thread
62 
64 
66  void postBeginJob();
67 
68  void postEndJob();
69 
72 
75 
78 
81 
84 
87 
90 
93 
94  void preEvent(edm::StreamContext const&);
95  void postEvent(edm::StreamContext const&);
96 
99 
102 
103  // these signal pairs are guaranteed to be called within the same thread
104 
105  //void preOpenFile(std::string const&, bool);
106  //void postOpenFile(std::string const&, bool);
107 
108  //void preCloseFile(std::string const&, bool);
109  //void postCloseFile(std::string const&, bool);
110 
112  //void postSourceConstruction(edm::ModuleDescription const&);
113 
116 
119 
122 
123  //void preModuleConstruction(edm::ModuleDescription const&);
124  //void postModuleConstruction(edm::ModuleDescription const&);
125 
126  //void preModuleBeginJob(edm::ModuleDescription const&);
127  //void postModuleBeginJob(edm::ModuleDescription const&);
128 
129  //void preModuleEndJob(edm::ModuleDescription const&);
130  //void postModuleEndJob(edm::ModuleDescription const&);
131 
132  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
133  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
134 
135  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
136  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
137 
140 
143 
146 
149 
152 
155 
158 
161 
164 
167 
170 
173 
174  // inherited from TBB task_scheduler_observer
175  void on_scheduler_entry(bool worker) final;
176  void on_scheduler_exit(bool worker) final;
177 
178 public:
179  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
180 
181 private:
182  // forward declarations
183  struct Resources;
184  struct AtomicResources;
185 
186  // per-thread measurements
187  struct Measurement {
188  public:
190  // take per-thread measurements
191  void measure() noexcept;
192  // take per-thread measurements, compute the delta with respect to the previous measurement, and store them in the argument
193  void measure_and_store(Resources & store) noexcept;
194  // take per-thread measurements, compute the delta with respect to the previous measurement, and add them to the argument
197 
198  public:
199  #ifdef DEBUG_THREAD_CONCURRENCY
201  #endif // DEBUG_THREAD_CONCURRENCY
202  boost::chrono::thread_clock::time_point time_thread;
203  boost::chrono::high_resolution_clock::time_point time_real;
206  };
207 
208  // highlight a group of modules
209  struct GroupOfModules {
210  public:
212  std::vector<unsigned int> modules;
213  };
214 
215  // resources being monitored by the service
216  struct Resources {
217  public:
218  Resources();
219  void reset();
221  Resources operator+(Resources const& other) const;
222 
223  public:
228  };
229 
230  // atomic version of Resources
232  public:
233  AtomicResources();
235  void reset();
236 
237  AtomicResources & operator=(AtomicResources const& other);
239  AtomicResources operator+(AtomicResources const& other) const;
240 
241  public:
242  std::atomic<boost::chrono::nanoseconds::rep> time_thread;
243  std::atomic<boost::chrono::nanoseconds::rep> time_real;
244  std::atomic<uint64_t> allocated;
245  std::atomic<uint64_t> deallocated;
246  };
247 
249  public:
251  void reset() noexcept;
253  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
254 
255  public:
257  unsigned events;
258  bool has_acquire; // whether this module has an acquire() method
259  };
260 
262  public:
263  void reset();
265  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
266 
267  public:
268  Resources active; // resources used by all modules on this path
269  Resources total; // resources used by all modules on this path, and their dependencies
270  unsigned last; // one-past-the last module that ran on this path
271  bool status; // whether the path accepted or rejected the event
272  };
273 
275  public:
277  void reset();
280 
281  public:
283  std::vector<ResourcesPerPath> paths;
284  std::vector<ResourcesPerPath> endpaths;
285  };
286 
288  public:
289  ResourcesPerJob() = default;
290  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
291  void reset();
293  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
294 
295  public:
298  Resources event; // total time etc. spent between preSourceEvent and postEvent
300  std::vector<Resources> highlight;
301  std::vector<ResourcesPerModule> modules;
302  std::vector<ResourcesPerProcess> processes;
303  unsigned events;
304  };
305 
306  // plot ranges and resolution
307  struct PlotRanges {
308  double time_range;
310  double memory_range;
312  };
313 
314  // plots associated to each module or other element (path, process, etc)
316  public:
317  PlotsPerElement() = default;
318  void book(DQMStore::ConcurrentBooker &, std::string const& name, std::string const& title, PlotRanges const& ranges, unsigned int lumisections, bool byls);
319  void fill(Resources const&, unsigned int lumisection);
320  void fill(AtomicResources const&, unsigned int lumisection);
321  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
322 
323  private:
324  // resources spent in the module
333  };
334 
335  // plots associated to each path or endpath
336  class PlotsPerPath {
337  public:
338  PlotsPerPath() = default;
339  void book(DQMStore::ConcurrentBooker &, std::string const &, ProcessCallGraph const&, ProcessCallGraph::PathType const&, PlotRanges const& ranges, unsigned int lumisections, bool byls);
340  void fill(ProcessCallGraph::PathType const&, ResourcesPerJob const&, ResourcesPerPath const&, unsigned int lumisection);
341 
342  private:
343  // resources spent in all the modules in the path, including their dependencies
345 
346  // Note:
347  // a TH1F has 7 significant digits, while a 24-hour long run could process
348  // order of 10 billion events; a 64-bit long integer would work and might
349  // be better suited than a double, but there is no "TH1L" in ROOT.
350 
351  // how many times each module and their dependencies has run
353  // resources spent in each module and their dependencies
358  };
359 
361  public:
364  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
365  unsigned int lumisections, bool bypath, bool byls);
366  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
367 
368  private:
369  // resources spent in all the modules of the (sub)process
371  // resources spent in each path and endpath
372  std::vector<PlotsPerPath> paths_;
373  std::vector<PlotsPerPath> endpaths_;
374  };
375 
376  class PlotsPerJob {
377  public:
378  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
379  void book(DQMStore::ConcurrentBooker &, ProcessCallGraph const&, std::vector<GroupOfModules> const&,
380  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
381  PlotRanges const& module_ranges, unsigned int lumisections,
382  bool bymodule, bool bypath, bool byls, bool transitions);
383  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
384  void fill_run(AtomicResources const&);
385  void fill_lumi(AtomicResources const&, unsigned int lumisection);
386 
387  private:
388  // resources spent in all the modules of the job
392  // resources spent in the modules' lumi and run transitions
395  // resources spent in the highlighted modules
396  std::vector<PlotsPerElement> highlight_;
397  // resources spent in each module
398  std::vector<PlotsPerElement> modules_;
399  // resources spent in each (sub)process
400  std::vector<PlotsPerProcess> processes_;
401  };
402 
403 
404  // keep track of the dependencies among modules
406 
407  // per-stream information
408  std::vector<ResourcesPerJob> streams_;
409 
410  // concurrent histograms and profiles
411  std::unique_ptr<PlotsPerJob> plots_;
412 
413  // per-lumi and per-run information
414  std::vector<AtomicResources> lumi_transition_; // resources spent in the modules' global and stream lumi transitions
415  std::vector<AtomicResources> run_transition_; // resources spent in the modules' global and stream run transitions
416  AtomicResources overhead_; // resources spent outside of the modules' transitions
417 
418  // summary data
419  ResourcesPerJob job_summary_; // whole event time accounting per-job
420  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
421  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
422 
423  // per-thread quantities, lazily allocated
424  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
426 
427  // atomic variables to keep track of the completion of each step, process by process
428  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
429  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_lumi_check_;
430  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
431 
432  // retrieve the current thread's per-thread quantities
433  Measurement & thread();
434 
435  // job configuration
436  unsigned int concurrent_lumis_;
437  unsigned int concurrent_runs_;
438  unsigned int concurrent_streams_;
439  unsigned int concurrent_threads_;
440 
441  // logging configuration
442  const bool print_event_summary_; // print the time spent in each process, path and module after every event
443  const bool print_run_summary_; // print the time spent in each process, path and module for each run
444  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
445 
446  // dqm configuration
447  bool enable_dqm_; // non const, depends on the availability of the DQMStore
449  const bool enable_dqm_bypath_;
450  const bool enable_dqm_byls_;
453 
457  const unsigned int dqm_lumisections_range_;
459 
460  std::vector<edm::ParameterSet>
461  highlight_module_psets_; // non-const, cleared in postBeginJob()
462  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
463 
464  // log unsupported signals
465  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
466 
467  // print the resource usage summary for en event, a run, or the while job
468  template <typename T>
469  void printHeader(T& out, std::string const & label) const;
470 
471  template <typename T>
472  void printEventHeader(T& out, std::string const & label) const;
473 
474  template <typename T>
475  void printEventLine(T& out, Resources const& data, std::string const & label) const;
476 
477  template <typename T>
478  void printEventLine(T& out, AtomicResources const& data, std::string const & label) const;
479 
480  template <typename T>
481  void printEvent(T& out, ResourcesPerJob const&) const;
482 
483  template <typename T>
484  void printSummaryHeader(T& out, std::string const & label, bool detailed) const;
485 
486  template <typename T>
487  void printPathSummaryHeader(T& out, std::string const & label) const;
488 
489  template <typename T>
490  void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
491 
492  template <typename T>
493  void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
494 
495  template <typename T>
496  void printPathSummaryLine(T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const;
497 
498  template <typename T>
499  void printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const;
500 
501  template <typename T>
502  void printTransition(T& out, AtomicResources const& data, std::string const& label) const;
503 
504  // check if this is the first process being signalled
507 
508  // check if this is the lest process being signalled
509  bool isLastSubprocess(std::atomic<unsigned int>& check);
510 };
511 
512 #endif // ! FastTimerService_h
ConcurrentMonitorElement time_thread_
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
static boost::mutex mutex
Definition: Proxy.cc:11
ConcurrentMonitorElement time_real_byls_
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_lumi_check_
void postStreamBeginRun(edm::StreamContext const &)
ConcurrentMonitorElement time_thread_byls_
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
std::vector< edm::ParameterSet > highlight_module_psets_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
ConcurrentMonitorElement time_real_
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
std::vector< unsigned int > modules
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
AtomicResources overhead_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
#define noexcept
std::atomic< uint64_t > deallocated
ConcurrentMonitorElement module_time_thread_total_
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
void postSourceRun(edm::RunIndex)
std::vector< ResourcesPerJob > streams_
std::atomic< uint64_t > allocated
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const bool print_job_summary_
const bool enable_dqm_bypath_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
const PlotRanges dqm_path_ranges_
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
void printPathSummaryHeader(T &out, std::string const &label) const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
ConcurrentMonitorElement deallocated_byls_
std::vector< ResourcesPerJob > run_summary_
~FastTimerService() override=default
ConcurrentMonitorElement module_deallocated_total_
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
ConcurrentMonitorElement module_counter_
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
unsigned int concurrent_lumis_
std::string dqm_path_
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
const bool enable_dqm_transitions_
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::atomic< boost::chrono::nanoseconds::rep > time_thread
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
string ranges
Definition: diffTwoXMLs.py:79
ConcurrentMonitorElement allocated_byls_
const PlotRanges dqm_module_ranges_
ConcurrentMonitorElement deallocated_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:349
std::vector< PlotsPerProcess > processes_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
unsigned long long uint64_t
Definition: Time.h:15
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
std::vector< AtomicResources > run_transition_
ConcurrentMonitorElement module_allocated_total_
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void ignoredSignal(const std::string &signal) const
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
ConcurrentMonitorElement allocated_
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
std::atomic< boost::chrono::nanoseconds::rep > time_real
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_store(Resources &store)
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< PlotsPerJob > plots_
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
ConcurrentMonitorElement module_time_real_total_
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
def check(config)
Definition: trackerTree.py:14
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< AtomicResources > lumi_transition_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)
void measure_and_accumulate(Resources &store)