CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // C++ headers
5 #include <cmath>
6 #include <string>
7 #include <map>
8 #include <unordered_map>
9 #include <chrono>
10 #include <mutex>
11 #include <unistd.h>
12 
13 // boost headers
14 #include <boost/chrono.hpp>
15 
16 // tbb headers
17 #include <tbb/concurrent_unordered_set.h>
18 #include <tbb/enumerable_thread_specific.h>
19 
20 
21 // CMSSW headers
40 
41 
42 /*
43 procesing time is divided into
44  - source
45  - event processing, sum of the time spent in all the modules
46 */
47 
48 /*
49 Assuming an HLT process with ~2500 modules and ~500 paths, tracking each step (with two calls per step, to start and stop the timer)
50 with std::chrono::high_resolution_clock gives a per-event overhead of 1 ms
51 
52 Detailed informations on different timers can be extracted running $CMSSW_RELEASE_BASE/test/$SCRAM_ARCH/testChrono .
53 
54 
55 Timer per-call overhead on SLC5:
56 
57 Linux 2.6.18-371.1.2.el5 x86_64
58 glibc version: 2.5
59 clock source: unknown
60 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
61 
62 Performance of std::chrono::high_resolution_clock
63  Average time per call: 317.0 ns
64  Clock tick period: 1.0 ns
65  Measured resolution: 1000.0 ns (median: 1000.0 ns) (sigma: 199.4 ns) (average: 1007.6 +/- 0.4 ns)
66 
67 
68 Timer per-call overhead on SLC6 (virtualized):
69 
70 Linux 2.6.32-358.23.2.el6.x86_64 x86_64
71 glibc version: 2.12
72 clock source: kvm-clock
73 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
74 
75 Performance of std::chrono::high_resolution_clock
76  Average time per call: 351.2 ns
77  Clock tick period: 1.0 ns
78  Measured resolution: 1.0 ns (median: 358.0 ns) (sigma: 30360.8 ns) (average: 685.7 +/- 42.4 ns)
79 */
80 
81 
83 public:
86 
87 private:
88  double queryModuleTime_(edm::StreamID, unsigned int id) const;
89 
90 public:
91  // query the time spent in a module/path/process (available after it has run)
92  double querySourceTime(edm::StreamID) const;
93  double queryEventTime(edm::StreamID) const;
94  double queryEventTime(edm::StreamID, std::string const& process) const;
96  double queryModuleTime(edm::StreamID, unsigned int id) const;
97  double queryModuleTimeByLabel(edm::StreamID, std::string const& module) const;
98  double queryModuleTimeByLabel(edm::StreamID, std::string const& process, const std::string & module) const;
99  double queryPathTime(edm::StreamID, std::string const& path) const;
100  double queryPathTime(edm::StreamID, std::string const& process, std::string const& path) const;
101  double queryHighlightTime(edm::StreamID sid, std::string const& label) const;
102 
103 private:
104  void ignoredSignal(std::string signal) const;
105  void unsupportedSignal(std::string signal) const;
106 
107  // these signal pairs are not guaranteed to happen in the same thread
108 
110 
112  void postBeginJob();
113 
114  void postEndJob();
115 
118 
119  void preGlobalEndRun(edm::GlobalContext const&);
121 
124 
125  void preStreamEndRun(edm::StreamContext const&);
127 
130 
133 
136 
139 
140  void preEvent(edm::StreamContext const&);
141  void postEvent(edm::StreamContext const&);
142 
143  void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
145 
148 
149  // these signal pairs are guaranteed to be called within the same thread
150 
151  //void preOpenFile(std::string const&, bool);
152  //void postOpenFile(std::string const&, bool);
153 
154  //void preCloseFile(std::string const&, bool);
155  //void postCloseFile(std::string const&, bool);
156 
158  //void postSourceConstruction(edm::ModuleDescription const&);
159 
160  void preSourceRun();
161  void postSourceRun();
162 
163  void preSourceLumi();
164  void postSourceLumi();
165 
168 
169  //void preModuleConstruction(edm::ModuleDescription const&);
170  //void postModuleConstruction(edm::ModuleDescription const&);
171 
172  //void preModuleBeginJob(edm::ModuleDescription const&);
173  //void postModuleBeginJob(edm::ModuleDescription const&);
174 
175  //void preModuleEndJob(edm::ModuleDescription const&);
176  //void postModuleEndJob(edm::ModuleDescription const&);
177 
178  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
179  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
180 
181  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
182  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
183 
186 
189 
192 
195 
198 
201 
204 
207 
210 
213 
216 
217 public:
218  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
219 
220 private:
221  // highlight a group of modules
222  struct GroupOfModules {
223  public:
225  std::vector<unsigned int> modules;
226  };
227 
228  // resources being monitored by the service
229  struct Resources {
230  public:
231  Resources();
232  void reset();
234  Resources operator+(Resources const& other) const;
235 
236  public:
239  };
240 
242  public:
244  void reset();
246  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
247 
248  public:
250  unsigned events;
251  };
252 
254  public:
256  void reset();
258  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
259 
260  public:
261  Resources active; // resources used by all modules on this path
262  Resources total; // resources used by all modules on this path, and their dependencies
263  unsigned last; // one-past-the last module that ran on this path
264  bool status; // whether the path accepted or rejected the event
265  };
266 
268  public:
270  void reset();
273 
274  public:
276  std::vector<ResourcesPerPath> paths;
277  std::vector<ResourcesPerPath> endpaths;
278  };
279 
281  public:
282  ResourcesPerJob();
283  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
284  void reset();
286  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
287 
288  public:
290  std::vector<Resources> highlight;
291  std::vector<ResourcesPerModule> modules;
292  std::vector<ResourcesPerProcess> processes;
293  unsigned events;
294  };
295 
296 
297  // per-thread measurements
298  struct Measurement {
299  public:
300  Measurement();
301  void measure();
302  void measure_and_store(Resources & store);
303 
304  public:
305  #ifdef DEBUG_THREAD_CONCURRENCY
307  #endif // DEBUG_THREAD_CONCURRENCY
308  boost::chrono::thread_clock::time_point time_thread;
309  boost::chrono::high_resolution_clock::time_point time_real;
310  };
311 
312  // plots associated to each module or other element (path, process, etc)
314  public:
315  PlotsPerElement();
316  void reset();
317  void book(DQMStore::IBooker &, std::string const& name, std::string const& title, double range, double resolution, unsigned int lumisections, bool byls);
318  void fill(Resources const&, unsigned int lumisection);
319  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
320 
321  private:
322  // resources spent in the module
323  TH1F * time_thread_;
324  TProfile * time_thread_byls_;
325  TH1F * time_real_;
326  TProfile * time_real_byls_;
327  };
328 
329  // plots associated to each path or endpath
330  class PlotsPerPath {
331  public:
332  PlotsPerPath();
333  void reset();
334  void book(DQMStore::IBooker &, ProcessCallGraph const&, ProcessCallGraph::PathType const&, double range, double resolution, unsigned int lumisections, bool byls);
335  void fill(ProcessCallGraph::PathType const&, ResourcesPerJob const&, ResourcesPerPath const&, unsigned int lumisection);
336 
337  private:
338  // resources spent in all the modules in the path, including their dependencies
340 
341  // Note:
342  // a TH1F has 7 significant digits, while a 24-hour long run could process
343  // order of 10 billion events; a 64-bit long integer would work and might
344  // be better suited than a double, but there is no "TH1L" in ROOT.
345 
346  // how many times each module and their dependencies has run
348  // resources spent in each module and their dependencies
351  };
352 
354  public:
356  void reset();
358  double event_range, double event_resolution, double path_range, double path_resolution,
359  unsigned int lumisections, bool byls);
360  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
361 
362  private:
363  // resources spent in all the modules of the (sub)process
365  // resources spent in each path and endpath
366  std::vector<PlotsPerPath> paths_;
367  std::vector<PlotsPerPath> endpaths_;
368  };
369 
370  class PlotsPerJob {
371  public:
372  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
373  void reset();
374  void book(DQMStore::IBooker &, ProcessCallGraph const&, std::vector<GroupOfModules> const&,
375  double event_range, double event_resolution, double path_range, double path_resolution,
376  double module_range, double module_resolution, unsigned int lumisections,
377  bool bymodule, bool byls);
378  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
379 
380  private:
381  // resources spent in all the modules of the job
383  // resources spent in the highlighted modules
384  std::vector<PlotsPerElement> highlight_;
385  // resources spent in each module
386  std::vector<PlotsPerElement> modules_;
387  // resources spent in each (sub)process
388  std::vector<PlotsPerProcess> processes_;
389  };
390 
391 
392  // keep track of the dependencies among modules
394 
395  // per-stream information
396  std::vector<ResourcesPerJob> streams_;
397  std::vector<PlotsPerJob> stream_plots_;
398 
399  // summary data
400  ResourcesPerJob job_summary_; // whole event time accounting per-job
401  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
402  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
403 
404  // per-thread quantities, lazily allocated
405  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
407 
408  // atomic variables to keep track of the completion of each step, process by process
409  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
410  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_lumisection_check_;
411  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_run_check_;
412  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
413 
414  // retrieve the current thread's per-thread quantities
415  Measurement & thread();
416 
417  // job configuration
418  unsigned int concurrent_runs_;
419  unsigned int concurrent_streams_;
420  unsigned int concurrent_threads_;
421 
422  // logging configuration
423  const bool print_event_summary_; // print the time spent in each process, path and module after every event
424  const bool print_run_summary_; // print the time spent in each process, path and module for each run
425  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
426 
427  // dqm configuration
428  unsigned int module_id_; // pseudo module id for the FastTimerService, needed by the thread-safe DQMStore
429 
430  bool enable_dqm_; // non const, depends on the availability of the DQMStore
432  const bool enable_dqm_byls_;
434 
435  const double dqm_eventtime_range_;
437  const double dqm_pathtime_range_;
439  const double dqm_moduletime_range_;
441  const unsigned int dqm_lumisections_range_;
443 
444  std::vector<edm::ParameterSet>
445  highlight_module_psets_; // non-const, cleared in postBeginJob()
446  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
447 
448  // log unsupported signals
449  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
450 
451  // print the resource usage summary for en event, a run, or the while job
452  template <typename T>
453  void printEvent(T& out, ResourcesPerJob const&) const;
454 
455  template <typename T>
456  void printSummary(T& out, ResourcesPerJob const&, std::string const& label) const;
457 
458  // check if this is the first process being signalled
461 
462  // check if this is the lest process being signalled
463  bool isLastSubprocess(std::atomic<unsigned int>& check);
464 };
465 
466 #endif // ! FastTimerService_h
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
void postStreamBeginRun(edm::StreamContext const &)
double queryEventTime(edm::StreamID) const
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void postStreamEndLumi(edm::StreamContext const &)
std::vector< edm::ParameterSet > highlight_module_psets_
const double dqm_pathtime_resolution_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
const double dqm_moduletime_range_
const double dqm_moduletime_resolution_
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
double queryHighlightTime(edm::StreamID sid, std::string const &label) const
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
static boost::mutex mutex
Definition: LHEProxy.cc:11
std::vector< unsigned int > modules
const double dqm_eventtime_range_
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
double queryModuleTime_(edm::StreamID, unsigned int id) const
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
std::vector< ResourcesPerJob > streams_
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const bool print_job_summary_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
void printSummary(T &out, ResourcesPerJob const &, std::string const &label) const
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
std::vector< PlotsPerJob > stream_plots_
double queryModuleTime(edm::StreamID, const edm::ModuleDescription &module) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
unsigned int module_id_
std::vector< ResourcesPerJob > run_summary_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_lumisection_check_
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
std::string dqm_path_
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
const double dqm_eventtime_resolution_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:348
std::vector< PlotsPerProcess > processes_
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
double querySourceTime(edm::StreamID) const
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndRun(edm::StreamContext const &)
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void unsupportedSignal(std::string signal) const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void ignoredSignal(std::string signal) const
double queryPathTime(edm::StreamID, std::string const &path) const
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
double queryModuleTimeByLabel(edm::StreamID, std::string const &module) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
def check(config)
Definition: trackerTree.py:14
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
Definition: vlib.h:208
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_run_check_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)
const double dqm_pathtime_range_