CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // C++ headers
5 #include <cmath>
6 #include <string>
7 #include <map>
8 #include <unordered_map>
9 #include <chrono>
10 #include <mutex>
11 #include <unistd.h>
12 
13 // boost headers
14 #include <boost/chrono.hpp>
15 
16 // tbb headers
17 #include <tbb/concurrent_unordered_set.h>
18 #include <tbb/enumerable_thread_specific.h>
19 #include <tbb/task_scheduler_observer.h>
20 
21 // CMSSW headers
41 
42 
43 /*
44 procesing time is divided into
45  - source
46  - event processing, sum of the time spent in all the modules
47 */
48 
49 /*
50 Assuming an HLT process with ~2500 modules and ~500 paths, tracking each step (with two calls per step, to start and stop the timer)
51 with std::chrono::high_resolution_clock gives a per-event overhead of 1 ms
52 
53 Detailed informations on different timers can be extracted running $CMSSW_RELEASE_BASE/test/$SCRAM_ARCH/testChrono .
54 
55 
56 Timer per-call overhead on SLC5:
57 
58 Linux 2.6.18-371.1.2.el5 x86_64
59 glibc version: 2.5
60 clock source: unknown
61 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
62 
63 Performance of std::chrono::high_resolution_clock
64  Average time per call: 317.0 ns
65  Clock tick period: 1.0 ns
66  Measured resolution: 1000.0 ns (median: 1000.0 ns) (sigma: 199.4 ns) (average: 1007.6 +/- 0.4 ns)
67 
68 
69 Timer per-call overhead on SLC6 (virtualized):
70 
71 Linux 2.6.32-358.23.2.el6.x86_64 x86_64
72 glibc version: 2.12
73 clock source: kvm-clock
74 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
75 
76 Performance of std::chrono::high_resolution_clock
77  Average time per call: 351.2 ns
78  Clock tick period: 1.0 ns
79  Measured resolution: 1.0 ns (median: 358.0 ns) (sigma: 30360.8 ns) (average: 685.7 +/- 42.4 ns)
80 */
81 
82 
83 class FastTimerService : public tbb::task_scheduler_observer
84 {
85 public:
87  ~FastTimerService() override;
88 
89 private:
90  double queryModuleTime_(edm::StreamID, unsigned int id) const;
91 
92 public:
93  // query the time spent in a module/path/process (available after it has run)
94  double querySourceTime(edm::StreamID) const;
95  double queryEventTime(edm::StreamID) const;
96  double queryEventTime(edm::StreamID, std::string const& process) const;
98  double queryModuleTime(edm::StreamID, unsigned int id) const;
99  double queryModuleTimeByLabel(edm::StreamID, std::string const& module) const;
100  double queryModuleTimeByLabel(edm::StreamID, std::string const& process, const std::string & module) const;
101  double queryPathTime(edm::StreamID, std::string const& path) const;
102  double queryPathTime(edm::StreamID, std::string const& process, std::string const& path) const;
103  double queryHighlightTime(edm::StreamID sid, std::string const& label) const;
104 
105 private:
106  void ignoredSignal(const std::string& signal) const;
107  void unsupportedSignal(const std::string& signal) const;
108 
109  // these signal pairs are not guaranteed to happen in the same thread
110 
112 
114  void postBeginJob();
115 
116  void postEndJob();
117 
120 
121  void preGlobalEndRun(edm::GlobalContext const&);
123 
126 
127  void preStreamEndRun(edm::StreamContext const&);
129 
132 
135 
138 
141 
142  void preEvent(edm::StreamContext const&);
143  void postEvent(edm::StreamContext const&);
144 
145  void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
147 
150 
151  // these signal pairs are guaranteed to be called within the same thread
152 
153  //void preOpenFile(std::string const&, bool);
154  //void postOpenFile(std::string const&, bool);
155 
156  //void preCloseFile(std::string const&, bool);
157  //void postCloseFile(std::string const&, bool);
158 
160  //void postSourceConstruction(edm::ModuleDescription const&);
161 
162  void preSourceRun();
163  void postSourceRun();
164 
165  void preSourceLumi();
166  void postSourceLumi();
167 
170 
171  //void preModuleConstruction(edm::ModuleDescription const&);
172  //void postModuleConstruction(edm::ModuleDescription const&);
173 
174  //void preModuleBeginJob(edm::ModuleDescription const&);
175  //void postModuleBeginJob(edm::ModuleDescription const&);
176 
177  //void preModuleEndJob(edm::ModuleDescription const&);
178  //void postModuleEndJob(edm::ModuleDescription const&);
179 
180  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
181  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
182 
183  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
184  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
185 
188 
191 
194 
197 
200 
203 
206 
209 
212 
215 
218 
219  // inherited from TBB task_scheduler_observer
220  void on_scheduler_entry(bool worker) final;
221  void on_scheduler_exit(bool worker) final;
222 
223 public:
224  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
225 
226 private:
227  // highlight a group of modules
228  struct GroupOfModules {
229  public:
231  std::vector<unsigned int> modules;
232  };
233 
234  // resources being monitored by the service
235  struct Resources {
236  public:
237  Resources();
238  void reset();
240  Resources operator+(Resources const& other) const;
241 
242  public:
247  };
248 
249  // atomic version of Resources
251  public:
252  AtomicResources();
254  void reset();
255 
256  AtomicResources & operator=(AtomicResources const& other);
258  AtomicResources operator+(AtomicResources const& other) const;
259 
260  public:
261  std::atomic<boost::chrono::nanoseconds::rep> time_thread;
262  std::atomic<boost::chrono::nanoseconds::rep> time_real;
263  std::atomic<uint64_t> allocated;
264  std::atomic<uint64_t> deallocated;
265  };
266 
268  public:
270  void reset();
272  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
273 
274  public:
276  unsigned events;
277  };
278 
279  // per-thread measurements
280  struct Measurement {
281  public:
282  Measurement();
283  void measure();
284  void measure_and_store(Resources & store);
285  void measure_and_accumulate(AtomicResources & store);
286 
287  public:
288  #ifdef DEBUG_THREAD_CONCURRENCY
290  #endif // DEBUG_THREAD_CONCURRENCY
291  boost::chrono::thread_clock::time_point time_thread;
292  boost::chrono::high_resolution_clock::time_point time_real;
295  };
296 
298  public:
300  void reset();
302  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
303 
304  public:
305  Resources active; // resources used by all modules on this path
306  Resources total; // resources used by all modules on this path, and their dependencies
307  unsigned last; // one-past-the last module that ran on this path
308  bool status; // whether the path accepted or rejected the event
309  };
310 
312  public:
314  void reset();
317 
318  public:
320  std::vector<ResourcesPerPath> paths;
321  std::vector<ResourcesPerPath> endpaths;
322  };
323 
325  public:
326  ResourcesPerJob();
327  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
328  void reset();
330  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
331 
332  public:
335  Resources event; // total time etc. spent between preSourceEvent and postEvent
337  std::vector<Resources> highlight;
338  std::vector<ResourcesPerModule> modules;
339  std::vector<ResourcesPerProcess> processes;
340  unsigned events;
341  };
342 
343  // plot ranges and resolution
344  struct PlotRanges {
345  double time_range;
347  double memory_range;
349  };
350 
351  // plots associated to each module or other element (path, process, etc)
353  public:
354  PlotsPerElement();
355  void reset();
356  void book(DQMStore::IBooker &, std::string const& name, std::string const& title, PlotRanges const& ranges, unsigned int lumisections, bool byls);
357  void fill(Resources const&, unsigned int lumisection);
358  void fill(AtomicResources const&, unsigned int lumisection);
359  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
360 
361  private:
362  // resources spent in the module
363  TH1F * time_thread_;
364  TProfile * time_thread_byls_;
365  TH1F * time_real_;
366  TProfile * time_real_byls_;
367  TH1F * allocated_;
368  TProfile * allocated_byls_;
369  TH1F * deallocated_;
370  TProfile * deallocated_byls_;
371  };
372 
373  // plots associated to each path or endpath
374  class PlotsPerPath {
375  public:
376  PlotsPerPath();
377  void reset();
378  void book(DQMStore::IBooker &, std::string const &, ProcessCallGraph const&, ProcessCallGraph::PathType const&, PlotRanges const& ranges, unsigned int lumisections, bool byls);
379  void fill(ProcessCallGraph::PathType const&, ResourcesPerJob const&, ResourcesPerPath const&, unsigned int lumisection);
380 
381  private:
382  // resources spent in all the modules in the path, including their dependencies
384 
385  // Note:
386  // a TH1F has 7 significant digits, while a 24-hour long run could process
387  // order of 10 billion events; a 64-bit long integer would work and might
388  // be better suited than a double, but there is no "TH1L" in ROOT.
389 
390  // how many times each module and their dependencies has run
392  // resources spent in each module and their dependencies
397  };
398 
400  public:
402  void reset();
404  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
405  unsigned int lumisections, bool bypath, bool byls);
406  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
407 
408  private:
409  // resources spent in all the modules of the (sub)process
411  // resources spent in each path and endpath
412  std::vector<PlotsPerPath> paths_;
413  std::vector<PlotsPerPath> endpaths_;
414  };
415 
416  class PlotsPerJob {
417  public:
418  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
419  void reset();
420  void book(DQMStore::IBooker &, ProcessCallGraph const&, std::vector<GroupOfModules> const&,
421  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
422  PlotRanges const& module_ranges, unsigned int lumisections,
423  bool bymodule, bool bypath, bool byls);
424  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
425 
426  private:
427  // resources spent in all the modules of the job
431  // resources spent in the highlighted modules
432  std::vector<PlotsPerElement> highlight_;
433  // resources spent in each module
434  std::vector<PlotsPerElement> modules_;
435  // resources spent in each (sub)process
436  std::vector<PlotsPerProcess> processes_;
437  };
438 
439 
440  // keep track of the dependencies among modules
442 
443  // per-stream information
444  std::vector<ResourcesPerJob> streams_;
445  std::vector<PlotsPerJob> stream_plots_;
446 
447  // summary data
448  ResourcesPerJob job_summary_; // whole event time accounting per-job
449  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
450  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
451 
452  // per-thread quantities, lazily allocated
453  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
455 
456  // atomic variables to keep track of the completion of each step, process by process
457  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
458  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_lumisection_check_;
459  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_run_check_;
460  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
461 
462  // retrieve the current thread's per-thread quantities
463  Measurement & thread();
464 
465  // job configuration
466  unsigned int concurrent_runs_;
467  unsigned int concurrent_streams_;
468  unsigned int concurrent_threads_;
469 
470  // logging configuration
471  const bool print_event_summary_; // print the time spent in each process, path and module after every event
472  const bool print_run_summary_; // print the time spent in each process, path and module for each run
473  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
474 
475  // dqm configuration
476  unsigned int module_id_; // pseudo module id for the FastTimerService, needed by the thread-safe DQMStore
477 
478  bool enable_dqm_; // non const, depends on the availability of the DQMStore
480  const bool enable_dqm_bypath_;
481  const bool enable_dqm_byls_;
483 
487  const unsigned int dqm_lumisections_range_;
489 
490  std::vector<edm::ParameterSet>
491  highlight_module_psets_; // non-const, cleared in postBeginJob()
492  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
493 
494  // log unsupported signals
495  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
496 
497  // print the resource usage summary for en event, a run, or the while job
498  template <typename T>
499  void printHeader(T& out, std::string const & label) const;
500 
501  template <typename T>
502  void printEventHeader(T& out, std::string const & label) const;
503 
504  template <typename T>
505  void printEventLine(T& out, Resources const& data, std::string const & label) const;
506 
507  template <typename T>
508  void printEvent(T& out, ResourcesPerJob const&) const;
509 
510  template <typename T>
511  void printSummaryHeader(T& out, std::string const & label, bool detaile) const;
512 
513  template <typename T>
514  void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
515 
516  template <typename T>
517  void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
518 
519  template <typename T>
520  void printSummary(T& out, ResourcesPerJob const&, std::string const& label) const;
521 
522  // check if this is the first process being signalled
525 
526  // check if this is the lest process being signalled
527  bool isLastSubprocess(std::atomic<unsigned int>& check);
528 };
529 
530 #endif // ! FastTimerService_h
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
void postStreamBeginRun(edm::StreamContext const &)
double queryEventTime(edm::StreamID) const
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
std::vector< edm::ParameterSet > highlight_module_psets_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
double queryHighlightTime(edm::StreamID sid, std::string const &label) const
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
static boost::mutex mutex
Definition: LHEProxy.cc:11
std::vector< unsigned int > modules
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
double queryModuleTime_(edm::StreamID, unsigned int id) const
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
std::atomic< uint64_t > deallocated
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
std::vector< ResourcesPerJob > streams_
std::atomic< uint64_t > allocated
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const bool print_job_summary_
const bool enable_dqm_bypath_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
void printSummary(T &out, ResourcesPerJob const &, std::string const &label) const
const PlotRanges dqm_path_ranges_
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
std::vector< PlotsPerJob > stream_plots_
void printSummaryHeader(T &out, std::string const &label, bool detaile) const
double queryModuleTime(edm::StreamID, const edm::ModuleDescription &module) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
unsigned int module_id_
std::vector< ResourcesPerJob > run_summary_
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_lumisection_check_
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
std::string dqm_path_
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::atomic< boost::chrono::nanoseconds::rep > time_thread
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
string ranges
Definition: diffTwoXMLs.py:78
~FastTimerService() override
const PlotRanges dqm_module_ranges_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:348
std::vector< PlotsPerProcess > processes_
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
unsigned long long uint64_t
Definition: Time.h:15
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
double querySourceTime(edm::StreamID) const
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void ignoredSignal(const std::string &signal) const
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
std::atomic< boost::chrono::nanoseconds::rep > time_real
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
double queryPathTime(edm::StreamID, std::string const &path) const
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
double queryModuleTimeByLabel(edm::StreamID, std::string const &module) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
def check(config)
Definition: trackerTree.py:14
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
Definition: vlib.h:208
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_run_check_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)