CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // system headers
5 #include <unistd.h>
6 
7 // C++ headers
8 #include <chrono>
9 #include <cmath>
10 #include <map>
11 #include <mutex>
12 #include <string>
13 #include <unordered_map>
14 
15 // boost headers
16 #include <boost/chrono.hpp>
17 
18 // tbb headers
19 #include <tbb/concurrent_unordered_set.h>
20 #include <tbb/enumerable_thread_specific.h>
21 #include <tbb/task_scheduler_observer.h>
22 
23 // CMSSW headers
42 
43 /*
44 procesing time is divided into
45  - source
46  - event processing, sum of the time spent in all the modules
47 */
48 
49 class FastTimerService : public tbb::task_scheduler_observer {
50 public:
52  ~FastTimerService() override = default;
53 
54 private:
55  void ignoredSignal(const std::string& signal) const;
56  void unsupportedSignal(const std::string& signal) const;
57 
58  // these signal pairs are not guaranteed to happen in the same thread
59 
61 
63  void postBeginJob();
64 
65  void postEndJob();
66 
69 
72 
75 
78 
81 
84 
87 
90 
91  void preEvent(edm::StreamContext const&);
92  void postEvent(edm::StreamContext const&);
93 
96 
99 
100  // these signal pairs are guaranteed to be called within the same thread
101 
102  //void preOpenFile(std::string const&, bool);
103  //void postOpenFile(std::string const&, bool);
104 
105  //void preCloseFile(std::string const&, bool);
106  //void postCloseFile(std::string const&, bool);
107 
109  //void postSourceConstruction(edm::ModuleDescription const&);
110 
113 
116 
119 
120  //void preModuleConstruction(edm::ModuleDescription const&);
121  //void postModuleConstruction(edm::ModuleDescription const&);
122 
123  //void preModuleBeginJob(edm::ModuleDescription const&);
124  //void postModuleBeginJob(edm::ModuleDescription const&);
125 
126  //void preModuleEndJob(edm::ModuleDescription const&);
127  //void postModuleEndJob(edm::ModuleDescription const&);
128 
129  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
130  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
131 
132  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
133  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
134 
137 
140 
143 
146 
149 
152 
155 
158 
161 
164 
167 
170 
171  // inherited from TBB task_scheduler_observer
172  void on_scheduler_entry(bool worker) final;
173  void on_scheduler_exit(bool worker) final;
174 
175 public:
176  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
177 
178 private:
179  // forward declarations
180  struct Resources;
181  struct AtomicResources;
182 
183  // per-thread measurements
184  struct Measurement {
185  public:
187  // take per-thread measurements
188  void measure() noexcept;
189  // take per-thread measurements, compute the delta with respect to the previous measurement, and store them in the argument
190  void measure_and_store(Resources& store) noexcept;
191  // take per-thread measurements, compute the delta with respect to the previous measurement, and add them to the argument
194 
195  public:
196 #ifdef DEBUG_THREAD_CONCURRENCY
198 #endif // DEBUG_THREAD_CONCURRENCY
199  boost::chrono::thread_clock::time_point time_thread;
200  boost::chrono::high_resolution_clock::time_point time_real;
203  };
204 
205  // highlight a group of modules
206  struct GroupOfModules {
207  public:
209  std::vector<unsigned int> modules;
210  };
211 
212  // resources being monitored by the service
213  struct Resources {
214  public:
215  Resources();
216  void reset();
218  Resources operator+(Resources const& other) const;
219 
220  public:
225  };
226 
227  // atomic version of Resources
229  public:
230  AtomicResources();
232  void reset();
233 
234  AtomicResources& operator=(AtomicResources const& other);
236  AtomicResources operator+(AtomicResources const& other) const;
237 
238  public:
239  std::atomic<boost::chrono::nanoseconds::rep> time_thread;
240  std::atomic<boost::chrono::nanoseconds::rep> time_real;
241  std::atomic<uint64_t> allocated;
242  std::atomic<uint64_t> deallocated;
243  };
244 
246  public:
248  void reset() noexcept;
250  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
251 
252  public:
254  unsigned events;
255  bool has_acquire; // whether this module has an acquire() method
256  };
257 
259  public:
260  void reset();
262  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
263 
264  public:
265  Resources active; // resources used by all modules on this path
266  Resources total; // resources used by all modules on this path, and their dependencies
267  unsigned last; // one-past-the last module that ran on this path
268  bool status; // whether the path accepted or rejected the event
269  };
270 
272  public:
274  void reset();
277 
278  public:
280  std::vector<ResourcesPerPath> paths;
281  std::vector<ResourcesPerPath> endpaths;
282  };
283 
285  public:
286  ResourcesPerJob() = default;
287  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
288  void reset();
290  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
291 
292  public:
295  Resources event; // total time etc. spent between preSourceEvent and postEvent
297  std::vector<Resources> highlight;
298  std::vector<ResourcesPerModule> modules;
299  std::vector<ResourcesPerProcess> processes;
300  unsigned events;
301  };
302 
303  // plot ranges and resolution
304  struct PlotRanges {
305  double time_range;
307  double memory_range;
309  };
310 
311  // plots associated to each module or other element (path, process, etc)
313  public:
314  PlotsPerElement() = default;
315  void book(dqm::reco::DQMStore::IBooker&,
316  std::string const& name,
317  std::string const& title,
318  PlotRanges const& ranges,
319  unsigned int lumisections,
320  bool byls);
321  void fill(Resources const&, unsigned int lumisection);
322  void fill(AtomicResources const&, unsigned int lumisection);
323  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
324 
325  private:
326  // resources spent in the module
335  };
336 
337  // plots associated to each path or endpath
338  class PlotsPerPath {
339  public:
340  PlotsPerPath() = default;
341  void book(dqm::reco::DQMStore::IBooker&,
342  std::string const&,
343  ProcessCallGraph const&,
345  PlotRanges const& ranges,
346  unsigned int lumisections,
347  bool byls);
348  void fill(ProcessCallGraph::PathType const&,
349  ResourcesPerJob const&,
350  ResourcesPerPath const&,
351  unsigned int lumisection);
352 
353  private:
354  // resources spent in all the modules in the path, including their dependencies
356 
357  // Note:
358  // a TH1F has 7 significant digits, while a 24-hour long run could process
359  // order of 10 billion events; a 64-bit long integer would work and might
360  // be better suited than a double, but there is no "TH1L" in ROOT.
361 
362  // how many times each module and their dependencies has run
364  // resources spent in each module and their dependencies
369  };
370 
372  public:
374  void book(dqm::reco::DQMStore::IBooker&,
375  ProcessCallGraph const&,
377  PlotRanges const& event_ranges,
378  PlotRanges const& path_ranges,
379  unsigned int lumisections,
380  bool bypath,
381  bool byls);
382  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
383 
384  private:
385  // resources spent in all the modules of the (sub)process
387  // resources spent in each path and endpath
388  std::vector<PlotsPerPath> paths_;
389  std::vector<PlotsPerPath> endpaths_;
390  };
391 
392  class PlotsPerJob {
393  public:
394  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
395  void book(dqm::reco::DQMStore::IBooker&,
396  ProcessCallGraph const&,
397  std::vector<GroupOfModules> const&,
398  PlotRanges const& event_ranges,
399  PlotRanges const& path_ranges,
400  PlotRanges const& module_ranges,
401  unsigned int lumisections,
402  bool bymodule,
403  bool bypath,
404  bool byls,
405  bool transitions);
406  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
407  void fill_run(AtomicResources const&);
408  void fill_lumi(AtomicResources const&, unsigned int lumisection);
409 
410  private:
411  // resources spent in all the modules of the job
415  // resources spent in the modules' lumi and run transitions
418  // resources spent in the highlighted modules
419  std::vector<PlotsPerElement> highlight_;
420  // resources spent in each module
421  std::vector<PlotsPerElement> modules_;
422  // resources spent in each (sub)process
423  std::vector<PlotsPerProcess> processes_;
424  };
425 
426  // keep track of the dependencies among modules
428 
429  // per-stream information
430  std::vector<ResourcesPerJob> streams_;
431 
432  // concurrent histograms and profiles
433  std::unique_ptr<PlotsPerJob> plots_;
434 
435  // per-lumi and per-run information
436  std::vector<AtomicResources> lumi_transition_; // resources spent in the modules' global and stream lumi transitions
437  std::vector<AtomicResources> run_transition_; // resources spent in the modules' global and stream run transitions
438  AtomicResources overhead_; // resources spent outside of the modules' transitions
439 
440  // summary data
441  ResourcesPerJob job_summary_; // whole event time accounting per-job
442  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
443  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
444 
445  // per-thread quantities, lazily allocated
446  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
448 
449  // atomic variables to keep track of the completion of each step, process by process
450  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
451  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_lumi_check_;
452  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
453 
454  // retrieve the current thread's per-thread quantities
455  Measurement& thread();
456 
457  // job configuration
458  unsigned int concurrent_lumis_;
459  unsigned int concurrent_runs_;
460  unsigned int concurrent_streams_;
461  unsigned int concurrent_threads_;
462 
463  // logging configuration
464  const bool print_event_summary_; // print the time spent in each process, path and module after every event
465  const bool print_run_summary_; // print the time spent in each process, path and module for each run
466  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
467 
468  // dqm configuration
469  bool enable_dqm_; // non const, depends on the availability of the DQMStore
471  const bool enable_dqm_bypath_;
472  const bool enable_dqm_byls_;
475 
479  const unsigned int dqm_lumisections_range_;
481 
482  std::vector<edm::ParameterSet> highlight_module_psets_; // non-const, cleared in postBeginJob()
483  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
484 
485  // log unsupported signals
486  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
487 
488  // print the resource usage summary for en event, a run, or the while job
489  template <typename T>
490  void printHeader(T& out, std::string const& label) const;
491 
492  template <typename T>
493  void printEventHeader(T& out, std::string const& label) const;
494 
495  template <typename T>
496  void printEventLine(T& out, Resources const& data, std::string const& label) const;
497 
498  template <typename T>
499  void printEventLine(T& out, AtomicResources const& data, std::string const& label) const;
500 
501  template <typename T>
502  void printEvent(T& out, ResourcesPerJob const&) const;
503 
504  template <typename T>
505  void printSummaryHeader(T& out, std::string const& label, bool detailed) const;
506 
507  template <typename T>
508  void printPathSummaryHeader(T& out, std::string const& label) const;
509 
510  template <typename T>
511  void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
512 
513  template <typename T>
514  void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
515 
516  template <typename T>
518  T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const;
519 
520  template <typename T>
521  void printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const;
522 
523  template <typename T>
524  void printTransition(T& out, AtomicResources const& data, std::string const& label) const;
525 
526  // check if this is the first process being signalled
529 
530  // check if this is the lest process being signalled
531  bool isLastSubprocess(std::atomic<unsigned int>& check);
532 };
533 
534 #endif // ! FastTimerService_h
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
static boost::mutex mutex
Definition: Proxy.cc:9
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_lumi_check_
void postStreamBeginRun(edm::StreamContext const &)
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void on_scheduler_entry(bool worker) final
void postStreamEndLumi(edm::StreamContext const &)
void on_scheduler_exit(bool worker) final
std::vector< edm::ParameterSet > highlight_module_psets_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void preSourceLumi(edm::LuminosityBlockIndex)
void printSummary(T &out, ResourcesPerJob const &data, std::string const &label) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
dqm::reco::MonitorElement * deallocated_byls_
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preSourceRun(edm::RunIndex)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
void unsupportedSignal(const std::string &signal) const
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
dqm::reco::MonitorElement * module_deallocated_total_
std::vector< unsigned int > modules
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
AtomicResources overhead_
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
std::atomic< uint64_t > deallocated
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
void postSourceRun(edm::RunIndex)
std::vector< ResourcesPerJob > streams_
dqm::reco::MonitorElement * time_thread_byls_
std::atomic< uint64_t > allocated
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
dqm::reco::MonitorElement * time_thread_
const bool print_job_summary_
const bool enable_dqm_bypath_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
dqm::reco::MonitorElement * deallocated_
dqm::reco::MonitorElement * time_real_
const PlotRanges dqm_path_ranges_
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
char const * label
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
void printPathSummaryHeader(T &out, std::string const &label) const
void printSummaryHeader(T &out, std::string const &label, bool detailed) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
dqm::reco::MonitorElement * module_time_thread_total_
std::vector< ResourcesPerJob > run_summary_
dqm::reco::MonitorElement * time_real_byls_
~FastTimerService() override=default
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
unsigned int concurrent_lumis_
std::string dqm_path_
dqm::reco::MonitorElement * module_time_real_total_
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
const bool enable_dqm_transitions_
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::atomic< boost::chrono::nanoseconds::rep > time_thread
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
void printPathSummaryLine(T &out, Resources const &data, Resources const &total, uint64_t events, std::string const &label) const
string ranges
Definition: diffTwoXMLs.py:79
#define noexcept
const PlotRanges dqm_module_ranges_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:349
std::vector< PlotsPerProcess > processes_
void postModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
dqm::reco::MonitorElement * allocated_byls_
unsigned long long uint64_t
Definition: Time.h:13
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void printTransition(T &out, AtomicResources const &data, std::string const &label) const
std::vector< AtomicResources > run_transition_
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void ignoredSignal(const std::string &signal) const
void postSourceEvent(edm::StreamID)
dqm::reco::MonitorElement * allocated_
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
std::atomic< boost::chrono::nanoseconds::rep > time_real
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void measure_and_store(Resources &store)
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< PlotsPerJob > plots_
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleEventAcquire(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
dqm::reco::MonitorElement * module_allocated_total_
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postSourceLumi(edm::LuminosityBlockIndex)
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
dqm::reco::MonitorElement * module_counter_
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< AtomicResources > lumi_transition_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)
void measure_and_accumulate(Resources &store)