CMS 3D CMS Logo

StallMonitor.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: FWCore/Services
4 // Class : StallMonitor
5 //
6 // Implementation:
7 //
8 // Original Author: Kyle Knoepfel
9 //
10 
30 
31 #include "monitor_file_utilities.h"
32 
33 #include "oneapi/tbb/concurrent_unordered_map.h"
34 #include <atomic>
35 #include <chrono>
36 #include <iomanip>
37 #include <iostream>
38 #include <sstream>
39 
41 
42 namespace {
43 
44  using duration_t = std::chrono::microseconds;
45  using clock_t = std::chrono::steady_clock;
46  auto now = clock_t::now;
47 
48  //===============================================================
49  class StallStatistics {
50  public:
51  // c'tor receiving 'std::string const&' type not provided since we
52  // must be able to call (e.g.) std::vector<StallStatistics>(20),
53  // for which a default label is not sensible in this context.
54  StallStatistics() = default;
55 
56  std::string const& label() const { return label_; }
57  unsigned numberOfStalls() const { return stallCounter_; }
58 
59  using rep_t = duration_t::rep;
60 
61  duration_t totalStalledTime() const { return duration_t{totalTime_.load()}; }
62  duration_t maxStalledTime() const { return duration_t{maxTime_.load()}; }
63 
64  // Modifiers
65  void setLabel(std::string const& label) { label_ = label; }
66 
67  void update(duration_t const ms) {
68  ++stallCounter_;
69  auto const thisTime = ms.count();
70  totalTime_ += thisTime;
71  rep_t max{maxTime_};
72  while (thisTime > max && !maxTime_.compare_exchange_strong(max, thisTime))
73  ;
74  }
75 
76  private:
77  std::string label_{};
78  std::atomic<unsigned> stallCounter_{};
79  std::atomic<rep_t> totalTime_{};
80  std::atomic<rep_t> maxTime_{};
81  };
82 
83  enum class step : char {
84  preSourceEvent = 'S',
85  postSourceEvent = 's',
86  preEvent = 'E',
87  postModuleEventPrefetching = 'p',
88  preModuleEventAcquire = 'A',
89  postModuleEventAcquire = 'a',
90  preModuleEvent = 'M',
91  preEventReadFromSource = 'R',
92  postEventReadFromSource = 'r',
93  postModuleEvent = 'm',
94  postEvent = 'e',
95  postESModulePrefetching = 'q',
96  preESModule = 'N',
97  postESModule = 'n',
98  preFrameworkTransition = 'F',
99  postFrameworkTransition = 'f'
100  };
101 
102  enum class Phase : short {
103  globalEndRun = -4,
104  streamEndRun = -3,
105  globalEndLumi = -2,
106  streamEndLumi = -1,
107  Event = 0,
108  streamBeginLumi = 1,
109  globalBeginLumi = 2,
110  streamBeginRun = 3,
111  globalBeginRun = 4,
112  eventSetupCall = 5
113  };
114 
115  std::ostream& operator<<(std::ostream& os, step const s) {
116  os << static_cast<std::underlying_type_t<step>>(s);
117  return os;
118  }
119 
120  std::ostream& operator<<(std::ostream& os, Phase const s) {
121  os << static_cast<std::underlying_type_t<Phase>>(s);
122  return os;
123  }
124 
125  template <step S, typename... ARGS>
126  std::string assembleMessage(ARGS const... args) {
127  std::ostringstream oss;
128  oss << S;
129  concatenate(oss, args...);
130  oss << '\n';
131  return oss.str();
132  }
133 
134  Phase toTransitionImpl(edm::StreamContext const& iContext) {
135  using namespace edm;
136  switch (iContext.transition()) {
138  return Phase::streamBeginRun;
140  return Phase::streamBeginLumi;
142  return Phase::Event;
144  return Phase::streamEndLumi;
146  return Phase::streamEndRun;
147  default:
148  break;
149  }
150  assert(false);
151  return Phase::Event;
152  }
153 
154  auto toTransition(edm::StreamContext const& iContext) -> std::underlying_type_t<Phase> {
155  return static_cast<std::underlying_type_t<Phase>>(toTransitionImpl(iContext));
156  }
157 
158  Phase toTransitionImpl(edm::GlobalContext const& iContext) {
159  using namespace edm;
160  switch (iContext.transition()) {
162  return Phase::globalBeginRun;
164  return Phase::globalBeginLumi;
166  return Phase::globalEndLumi;
168  return Phase::globalEndLumi;
170  return Phase::globalEndRun;
172  return Phase::globalEndRun;
173  default:
174  break;
175  }
176  assert(false);
177  return Phase::Event;
178  }
179  auto toTransition(edm::GlobalContext const& iContext) -> std::underlying_type_t<Phase> {
180  return static_cast<std::underlying_type_t<Phase>>(toTransitionImpl(iContext));
181  }
182 
183 } // namespace
184 
185 namespace edm {
186  namespace service {
187 
188  class StallMonitor {
189  public:
191  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
192 
193  private:
194  void preModuleConstruction(edm::ModuleDescription const&);
195  void preModuleDestruction(edm::ModuleDescription const&);
196  void postBeginJob();
197  void preSourceEvent(StreamID);
198  void postSourceEvent(StreamID);
199  void preEvent(StreamContext const&);
200  void preModuleEventAcquire(StreamContext const&, ModuleCallingContext const&);
201  void postModuleEventAcquire(StreamContext const&, ModuleCallingContext const&);
202  void preModuleEvent(StreamContext const&, ModuleCallingContext const&);
203  void postModuleEventPrefetching(StreamContext const&, ModuleCallingContext const&);
204  void preEventReadFromSource(StreamContext const&, ModuleCallingContext const&);
205  void postEventReadFromSource(StreamContext const&, ModuleCallingContext const&);
206  void postModuleEvent(StreamContext const&, ModuleCallingContext const&);
207  void postEvent(StreamContext const&);
208  void preModuleStreamTransition(StreamContext const&, ModuleCallingContext const&);
209  void postModuleStreamTransition(StreamContext const&, ModuleCallingContext const&);
210  void preModuleGlobalTransition(GlobalContext const&, ModuleCallingContext const&);
211  void postModuleGlobalTransition(GlobalContext const&, ModuleCallingContext const&);
212  void postEndJob();
213 
215  bool const validFile_; // Separate data member from file to improve efficiency.
216  duration_t const stallThreshold_;
217  decltype(now()) beginTime_{};
218 
219  // There can be multiple modules per stream. Therefore, we need
220  // the combination of StreamID and ModuleID to correctly track
221  // stalling information. We use oneapi::tbb::concurrent_unordered_map
222  // for this purpose.
223  using StreamID_value = decltype(std::declval<StreamID>().value());
224  using ModuleID = decltype(std::declval<ModuleDescription>().id());
225  oneapi::tbb::concurrent_unordered_map<std::pair<StreamID_value, ModuleID>,
226  std::pair<decltype(beginTime_), bool>,
228  stallStart_{};
229 
230  std::vector<std::string> moduleLabels_{};
231  std::vector<StallStatistics> moduleStats_{};
232  std::vector<std::string> esModuleLabels_{};
233  unsigned int numStreams_;
234  };
235 
236  } // namespace service
237 
238 } // namespace edm
239 
240 namespace {
241  constexpr char const* const filename_default{""};
242  constexpr double threshold_default{0.1}; //default threashold in seconds
243  std::string const space{" "};
244 } // namespace
245 
247 using namespace std::chrono;
248 
250  : file_{iPS.getUntrackedParameter<std::string>("fileName")},
251  validFile_{file_},
252  stallThreshold_{
253  std::chrono::round<duration_t>(duration<double>(iPS.getUntrackedParameter<double>("stallThreshold")))} {
254  iRegistry.watchPreModuleConstruction(this, &StallMonitor::preModuleConstruction);
255  iRegistry.watchPreModuleDestruction(this, &StallMonitor::preModuleDestruction);
256  iRegistry.watchPostBeginJob(this, &StallMonitor::postBeginJob);
257  iRegistry.watchPostModuleEventPrefetching(this, &StallMonitor::postModuleEventPrefetching);
258  iRegistry.watchPreModuleEventAcquire(this, &StallMonitor::preModuleEventAcquire);
259  iRegistry.watchPreModuleEvent(this, &StallMonitor::preModuleEvent);
260  iRegistry.watchPostEndJob(this, &StallMonitor::postEndJob);
261 
262  if (validFile_) {
263  // Only enable the following callbacks if writing to a file.
264  iRegistry.watchPreSourceEvent(this, &StallMonitor::preSourceEvent);
265  iRegistry.watchPostSourceEvent(this, &StallMonitor::postSourceEvent);
266  iRegistry.watchPreEvent(this, &StallMonitor::preEvent);
267  iRegistry.watchPostModuleEventAcquire(this, &StallMonitor::postModuleEventAcquire);
268  iRegistry.watchPreEventReadFromSource(this, &StallMonitor::preEventReadFromSource);
269  iRegistry.watchPostEventReadFromSource(this, &StallMonitor::postEventReadFromSource);
270  iRegistry.watchPostModuleEvent(this, &StallMonitor::postModuleEvent);
271  iRegistry.watchPostEvent(this, &StallMonitor::postEvent);
272 
273  iRegistry.watchPreModuleStreamBeginRun(this, &StallMonitor::preModuleStreamTransition);
274  iRegistry.watchPostModuleStreamBeginRun(this, &StallMonitor::postModuleStreamTransition);
275  iRegistry.watchPreModuleStreamEndRun(this, &StallMonitor::preModuleStreamTransition);
276  iRegistry.watchPostModuleStreamEndRun(this, &StallMonitor::postModuleStreamTransition);
277 
278  iRegistry.watchPreModuleStreamBeginLumi(this, &StallMonitor::preModuleStreamTransition);
279  iRegistry.watchPostModuleStreamBeginLumi(this, &StallMonitor::postModuleStreamTransition);
280  iRegistry.watchPreModuleStreamEndLumi(this, &StallMonitor::preModuleStreamTransition);
281  iRegistry.watchPostModuleStreamEndLumi(this, &StallMonitor::postModuleStreamTransition);
282 
283  iRegistry.watchPreModuleGlobalBeginRun(this, &StallMonitor::preModuleGlobalTransition);
284  iRegistry.watchPostModuleGlobalBeginRun(this, &StallMonitor::postModuleGlobalTransition);
285  iRegistry.watchPreModuleGlobalEndRun(this, &StallMonitor::preModuleGlobalTransition);
286  iRegistry.watchPostModuleGlobalEndRun(this, &StallMonitor::postModuleGlobalTransition);
287  iRegistry.watchPreModuleWriteRun(this, &StallMonitor::preModuleGlobalTransition);
288  iRegistry.watchPostModuleWriteRun(this, &StallMonitor::postModuleGlobalTransition);
289 
290  iRegistry.watchPreModuleGlobalBeginLumi(this, &StallMonitor::preModuleGlobalTransition);
291  iRegistry.watchPostModuleGlobalBeginLumi(this, &StallMonitor::postModuleGlobalTransition);
292  iRegistry.watchPreModuleGlobalEndLumi(this, &StallMonitor::preModuleGlobalTransition);
293  iRegistry.watchPostModuleGlobalEndLumi(this, &StallMonitor::postModuleGlobalTransition);
294  iRegistry.watchPreModuleWriteLumi(this, &StallMonitor::preModuleGlobalTransition);
295  iRegistry.watchPostModuleWriteLumi(this, &StallMonitor::postModuleGlobalTransition);
296 
297  iRegistry.postESModuleRegistrationSignal_.connect([this](auto const& iDescription) {
298  if (esModuleLabels_.size() <= iDescription.id_) {
299  esModuleLabels_.resize(iDescription.id_ + 1);
300  }
301  if (not iDescription.label_.empty()) {
302  esModuleLabels_[iDescription.id_] = iDescription.label_;
303  } else {
304  esModuleLabels_[iDescription.id_] = iDescription.type_;
305  }
306  });
307 
308  iRegistry.preESModuleSignal_.connect([this](auto const&, auto const& context) {
309  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
310  auto msg = assembleMessage<step::preESModule>(
311  numStreams_, module_id(context), std::underlying_type_t<Phase>(Phase::eventSetupCall), t);
312  file_.write(std::move(msg));
313  });
314  iRegistry.postESModuleSignal_.connect([this](auto const&, auto const& context) {
315  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
316  auto msg = assembleMessage<step::postESModule>(
317  numStreams_, module_id(context), std::underlying_type_t<Phase>(Phase::eventSetupCall), t);
318  file_.write(std::move(msg));
319  });
320 
321  iRegistry.preallocateSignal_.connect(
322  [this](service::SystemBounds const& iBounds) { numStreams_ = iBounds.maxNumberOfStreams(); });
323 
324  bool recordFrameworkTransitions = iPS.getUntrackedParameter<bool>("recordFrameworkTransitions");
325  if (recordFrameworkTransitions) {
326  {
327  auto preGlobal = [this](GlobalContext const& gc) {
328  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
329  auto msg = assembleMessage<step::preFrameworkTransition>(
330  numStreams_, gc.luminosityBlockID().run(), gc.luminosityBlockID().luminosityBlock(), toTransition(gc), t);
331  file_.write(std::move(msg));
332  };
333  iRegistry.watchPreGlobalBeginRun(preGlobal);
334  iRegistry.watchPreGlobalBeginLumi(preGlobal);
335  iRegistry.watchPreGlobalEndLumi(preGlobal);
336  iRegistry.watchPreGlobalEndRun(preGlobal);
337  }
338  {
339  auto postGlobal = [this](GlobalContext const& gc) {
340  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
341  auto msg = assembleMessage<step::postFrameworkTransition>(
342  numStreams_, gc.luminosityBlockID().run(), gc.luminosityBlockID().luminosityBlock(), toTransition(gc), t);
343  file_.write(std::move(msg));
344  };
345  iRegistry.watchPostGlobalBeginRun(postGlobal);
346  iRegistry.watchPostGlobalBeginLumi(postGlobal);
347  iRegistry.watchPostGlobalEndLumi(postGlobal);
348  iRegistry.watchPostGlobalEndRun(postGlobal);
349  }
350  {
351  auto preStream = [this](StreamContext const& sc) {
352  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
353  auto msg = assembleMessage<step::preFrameworkTransition>(
354  stream_id(sc), sc.eventID().run(), sc.eventID().luminosityBlock(), toTransition(sc), t);
355  file_.write(std::move(msg));
356  };
357  iRegistry.watchPreStreamBeginRun(preStream);
358  iRegistry.watchPreStreamBeginLumi(preStream);
359  iRegistry.watchPreStreamEndLumi(preStream);
360  iRegistry.watchPreStreamEndRun(preStream);
361  }
362  {
363  auto postStream = [this](StreamContext const& sc) {
364  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
365  auto msg = assembleMessage<step::postFrameworkTransition>(
366  stream_id(sc), sc.eventID().run(), sc.eventID().luminosityBlock(), toTransition(sc), t);
367  file_.write(std::move(msg));
368  };
369  iRegistry.watchPostStreamBeginRun(postStream);
370  iRegistry.watchPostStreamBeginLumi(postStream);
371  iRegistry.watchPostStreamEndLumi(postStream);
372  iRegistry.watchPostStreamEndRun(postStream);
373  }
374  }
375 
376  std::ostringstream oss;
377  oss << "# Transition Symbol\n";
378  oss << "#----------------- ------\n";
379  oss << "# eventSetupCall " << Phase::eventSetupCall << "\n"
380  << "# globalBeginRun " << Phase::globalBeginRun << "\n"
381  << "# streamBeginRun " << Phase::streamBeginRun << "\n"
382  << "# globalBeginLumi " << Phase::globalBeginLumi << "\n"
383  << "# streamBeginLumi " << Phase::streamBeginLumi << "\n"
384  << "# Event " << Phase::Event << "\n"
385  << "# streamEndLumi " << Phase::streamEndLumi << "\n"
386  << "# globalEndLumi " << Phase::globalEndLumi << "\n"
387  << "# streamEndRun " << Phase::streamEndRun << "\n"
388  << "# globalEndRun " << Phase::globalEndRun << "\n";
389  oss << "# Step Symbol Entries\n"
390  << "# -------------------------- ------ ------------------------------------------\n"
391  << "# preSourceEvent " << step::preSourceEvent << " <Stream ID> <Time since beginJob (ms)>\n"
392  << "# postSourceEvent " << step::postSourceEvent << " <Stream ID> <Time since beginJob (ms)>\n"
393  << "# preEvent " << step::preEvent
394  << " <Stream ID> <Run#> <LumiBlock#> <Event#> <Time since beginJob (ms)>\n"
395  << "# postModuleEventPrefetching " << step::postModuleEventPrefetching
396  << " <Stream ID> <Module ID> <Time since beginJob (ms)>\n"
397  << "# preModuleEventAcquire " << step::preModuleEventAcquire
398  << " <Stream ID> <Module ID> <Time since beginJob (ms)>\n"
399  << "# postModuleEventAcquire " << step::postModuleEventAcquire
400  << " <Stream ID> <Module ID> <Time since beginJob (ms)>\n"
401  << "# preModuleTransition " << step::preModuleEvent
402  << " <Stream ID> <Module ID> <Transition type> <Time since beginJob (ms)>\n"
403  << "# preEventReadFromSource " << step::preEventReadFromSource
404  << " <Stream ID> <Module ID> <Time since beginJob (ms)>\n"
405  << "# postEventReadFromSource " << step::postEventReadFromSource
406  << " <Stream ID> <Module ID> <Time since beginJob (ms)>\n"
407  << "# postModuleTransition " << step::postModuleEvent
408  << " <Stream ID> <Module ID> <Transition type> <Time since beginJob (ms)>\n"
409  << "# postEvent " << step::postEvent
410  << " <Stream ID> <Run#> <LumiBlock#> <Event#> <Time since beginJob (ms)>\n"
411  << "# postESModulePrefetching " << step::postESModulePrefetching
412  << " <Stream ID> <ESModule ID> <Transition type> <Time since beginJob (ms)>\n"
413  << "# preESModuleTransition " << step::preESModule
414  << " <StreamID> <ESModule ID> <TransitionType> <Time since beginJob (ms)>\n"
415  << "# postESModuleTransition " << step::postESModule
416  << " <StreamID> <ESModule ID> <TransitionType> <Time since beginJob (ms)>\n";
417  if (recordFrameworkTransitions) {
418  oss << "# preFrameworkTransition " << step::preFrameworkTransition
419  << " <Stream ID> <Run#> <LumiBlock#> <Transition type> <Time since beginJob (ms)>\n"
420  << "# postFrameworkTransition " << step::postFrameworkTransition
421  << " <Stream ID> <Run#> <LumiBlock#> <Transition type> <Time since beginJob (ms)>\n";
422  }
423  file_.write(oss.str());
424  }
425 }
426 
429  desc.addUntracked<std::string>("fileName", filename_default)
430  ->setComment(
431  "Name of file to which detailed timing information should be written.\n"
432  "An empty filename argument (the default) indicates that no extra\n"
433  "information will be written to a dedicated file, but only the summary\n"
434  "including stalling-modules information will be logged.");
435  desc.addUntracked<double>("stallThreshold", threshold_default)
436  ->setComment(
437  "Threshold (in seconds) used to classify modules as stalled.\n"
438  "Microsecond granularity allowed.");
439  desc.addUntracked<bool>("recordFrameworkTransitions", false)
440  ->setComment(
441  "When writing a file, include the framework state transitions:\n"
442  " stream and global, begin and end, Run and LuminosityBlock.");
443  descriptions.add("StallMonitor", desc);
444  descriptions.setComment(
445  "This service keeps track of various times in event-processing to determine which modules are stalling.");
446 }
447 
449  // Module labels are dense, so if the module id is greater than the
450  // size of moduleLabels_, grow the vector to the correct index and
451  // assign the last entry to the desired label. Note that with the
452  // current implementation, there is no module with ID '0'. In
453  // principle, the module-information vectors are therefore each one
454  // entry too large. However, since removing the entry at the front
455  // makes for awkward indexing later on, and since the sizes of these
456  // extra entries are on the order of bytes, we will leave them in
457  // and skip over them later when printing out summaries. The
458  // extraneous entries can be identified by their module labels being
459  // empty.
460  auto const mid = md.id();
461  if (mid < moduleLabels_.size()) {
462  moduleLabels_[mid] = md.moduleLabel();
463  } else {
464  moduleLabels_.resize(mid + 1);
465  moduleLabels_.back() = md.moduleLabel();
466  }
467 }
468 
470  // Reset the module label back if the module is deleted before
471  // beginJob() so that the entry is ignored in the summary printouts.
472  moduleLabels_[md.id()] = "";
473 }
474 
476  // Since a (push,emplace)_back cannot be called for a vector of a
477  // type containing atomics (like 'StallStatistics')--i.e. atomics
478  // have no copy/move-assignment operators, we must specify the size
479  // of the vector at construction time.
480  moduleStats_ = std::vector<StallStatistics>(moduleLabels_.size());
481  for (std::size_t i{}; i < moduleStats_.size(); ++i) {
482  moduleStats_[i].setLabel(moduleLabels_[i]);
483  }
484 
485  if (validFile_) {
486  {
487  std::ostringstream oss;
488  moduleIdToLabel(oss, moduleLabels_, 'M', "Module ID", "Module label");
489  file_.write(oss.str());
490  }
491  {
492  std::ostringstream oss;
493  moduleIdToLabel(oss, esModuleLabels_, 'N', "ESModule ID", "ESModule label");
494  file_.write(oss.str());
495  }
496  }
497  // Don't need the labels anymore--info. is now part of the
498  // module-statistics objects.
499  decltype(moduleLabels_)().swap(moduleLabels_);
500  decltype(esModuleLabels_)().swap(esModuleLabels_);
501 
502  beginTime_ = now();
503 }
504 
506  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
507  auto msg = assembleMessage<step::preSourceEvent>(sid.value(), t);
509 }
510 
512  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
513  auto msg = assembleMessage<step::postSourceEvent>(sid.value(), t);
515 }
516 
518  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
519  auto const& eid = sc.eventID();
520  auto msg = assembleMessage<step::preEvent>(stream_id(sc), eid.run(), eid.luminosityBlock(), eid.event(), t);
522 }
523 
525  auto const sid = stream_id(sc);
526  auto const mid = module_id(mcc);
527  auto start = stallStart_[std::make_pair(sid, mid)] = std::make_pair(now(), false);
528 
529  if (validFile_) {
530  auto const t = duration_cast<duration_t>(start.first - beginTime_).count();
531  auto msg = assembleMessage<step::postModuleEventPrefetching>(sid, mid, t);
533  }
534 }
535 
537  auto const preModEventAcquire = now();
538  auto const sid = stream_id(sc);
539  auto const mid = module_id(mcc);
540  auto& start = stallStart_[std::make_pair(sid, mid)];
541  auto startT = start.first.time_since_epoch();
542  start.second = true; // record so the preModuleEvent knows that acquire was called
543  if (validFile_) {
544  auto t = duration_cast<duration_t>(preModEventAcquire - beginTime_).count();
545  auto msg = assembleMessage<step::preModuleEventAcquire>(sid, mid, t);
547  }
548  // Check for stalls if prefetch was called
549  if (duration_t::duration::zero() != startT) {
550  auto const preFetch_to_preModEventAcquire = duration_cast<duration_t>(preModEventAcquire - start.first);
551  if (preFetch_to_preModEventAcquire < stallThreshold_)
552  return;
553  moduleStats_[mid].update(preFetch_to_preModEventAcquire);
554  }
555 }
556 
558  auto const postModEventAcquire = duration_cast<duration_t>(now() - beginTime_).count();
559  auto msg = assembleMessage<step::postModuleEventAcquire>(stream_id(sc), module_id(mcc), postModEventAcquire);
561 }
562 
564  auto const preModEvent = now();
565  auto const sid = stream_id(sc);
566  auto const mid = module_id(mcc);
567  auto const& start = stallStart_[std::make_pair(sid, mid)];
568  auto startT = start.first.time_since_epoch();
569  if (validFile_) {
570  auto t = duration_cast<duration_t>(preModEvent - beginTime_).count();
571  auto msg =
572  assembleMessage<step::preModuleEvent>(sid, mid, static_cast<std::underlying_type_t<Phase>>(Phase::Event), t);
574  }
575  // Check for stalls if prefetch was called and we did not already check before acquire
576  if (duration_t::duration::zero() != startT && !start.second) {
577  auto const preFetch_to_preModEvent = duration_cast<duration_t>(preModEvent - start.first);
578  if (preFetch_to_preModEvent < stallThreshold_)
579  return;
580  moduleStats_[mid].update(preFetch_to_preModEvent);
581  }
582 }
583 
585  auto const tNow = now();
586  auto const sid = stream_id(sc);
587  auto const mid = module_id(mcc);
588  auto t = duration_cast<duration_t>(tNow - beginTime_).count();
589  auto msg = assembleMessage<step::preModuleEvent>(sid, mid, toTransition(sc), t);
591 }
592 
594  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
595  auto msg = assembleMessage<step::postModuleEvent>(stream_id(sc), module_id(mcc), toTransition(sc), t);
597 }
598 
600  auto t = duration_cast<duration_t>(now() - beginTime_).count();
601  auto msg = assembleMessage<step::preModuleEvent>(numStreams_, module_id(mcc), toTransition(gc), t);
603 }
604 
606  auto const postModTime = duration_cast<duration_t>(now() - beginTime_).count();
607  auto msg = assembleMessage<step::postModuleEvent>(numStreams_, module_id(mcc), toTransition(gc), postModTime);
609 }
610 
612  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
613  auto msg = assembleMessage<step::preEventReadFromSource>(stream_id(sc), module_id(mcc), t);
615 }
616 
618  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
619  auto msg = assembleMessage<step::postEventReadFromSource>(stream_id(sc), module_id(mcc), t);
621 }
622 
624  auto const postModEvent = duration_cast<duration_t>(now() - beginTime_).count();
625  auto msg = assembleMessage<step::postModuleEvent>(
626  stream_id(sc), module_id(mcc), static_cast<std::underlying_type_t<Phase>>(Phase::Event), postModEvent);
628 }
629 
631  auto const t = duration_cast<duration_t>(now() - beginTime_).count();
632  auto const& eid = sc.eventID();
633  auto msg = assembleMessage<step::postEvent>(stream_id(sc), eid.run(), eid.luminosityBlock(), eid.event(), t);
635 }
636 
638  // Prepare summary
639  std::size_t width{};
640  edm::for_all(moduleStats_, [&width](auto const& stats) {
641  if (stats.numberOfStalls() == 0u)
642  return;
643  width = std::max(width, stats.label().size());
644  });
645 
646  OStreamColumn tag{"StallMonitor>"};
647  OStreamColumn col1{"Module label", width};
648  OStreamColumn col2{"# of stalls"};
649  OStreamColumn col3{"Total stalled time"};
650  OStreamColumn col4{"Max stalled time"};
651 
652  LogAbsolute out{"StallMonitor"};
653  out << '\n';
654  out << tag << space << col1 << space << col2 << space << col3 << space << col4 << '\n';
655 
656  out << tag << space << std::setfill('-') << col1(std::string{}) << space << col2(std::string{}) << space
657  << col3(std::string{}) << space << col4(std::string{}) << '\n';
658 
659  using seconds_d = duration<double>;
660 
661  auto to_seconds_str = [](auto const& duration) {
662  std::ostringstream oss;
663  auto const time = duration_cast<seconds_d>(duration).count();
664  oss << time << " s";
665  return oss.str();
666  };
667 
668  out << std::setfill(' ');
669  for (auto const& stats : moduleStats_) {
670  if (stats.label().empty() || // See comment in filling of moduleLabels_;
671  stats.numberOfStalls() == 0u)
672  continue;
673  out << std::left << tag << space << col1(stats.label()) << space << std::right << col2(stats.numberOfStalls())
674  << space << col3(to_seconds_str(stats.totalStalledTime())) << space
675  << col4(to_seconds_str(stats.maxStalledTime())) << '\n';
676  }
677 }
678 
Definition: start.py:1
void preModuleEvent(StreamContext const &, ModuleCallingContext const &)
auto module_id(edm::ModuleCallingContext const &mcc)
void preSourceEvent(StreamID)
ThreadSafeOutputFileStream file_
void postEvent(StreamContext const &)
decltype(now()) beginTime_
std::vector< std::string > esModuleLabels_
Phase
assert(be >=bs)
void preModuleConstruction(edm::ModuleDescription const &)
void swap(Association< C > &lhs, Association< C > &rhs)
Definition: Association.h:112
void preModuleStreamTransition(StreamContext const &, ModuleCallingContext const &)
void preModuleDestruction(edm::ModuleDescription const &)
Func for_all(ForwardSequence &s, Func f)
wrapper for std::for_each
Definition: Algorithms.h:14
T getUntrackedParameter(std::string const &, T const &) const
std::vector< std::string > moduleLabels_
char const * label
void postModuleEventPrefetching(StreamContext const &, ModuleCallingContext const &)
unsigned int id() const
void preEventReadFromSource(StreamContext const &, ModuleCallingContext const &)
duration_t const stallThreshold_
void preEvent(StreamContext const &)
Transition transition() const
Definition: StreamContext.h:56
StallMonitor(ParameterSet const &, ActivityRegistry &)
std::ostream & operator<<(std::ostream &os, SimpleMemoryCheck::SignificantEvent const &se)
rep
Definition: cuy.py:1189
void preModuleEventAcquire(StreamContext const &, ModuleCallingContext const &)
#define DEFINE_FWK_SERVICE(type)
Definition: ServiceMaker.h:97
void setComment(std::string const &value)
void moduleIdToLabel(std::ostream &oStream, std::vector< std::string > const &iModuleLabels, char moduleIdSymbol, std::string const &iIDHeader, std::string const &iLabelHeader)
void postModuleEventAcquire(StreamContext const &, ModuleCallingContext const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void postEventReadFromSource(StreamContext const &, ModuleCallingContext const &)
void postModuleGlobalTransition(GlobalContext const &, ModuleCallingContext const &)
void concatenate(std::ostream &os, T const t)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
tuple msg
Definition: mps_check.py:286
oneapi::tbb::concurrent_unordered_map< std::pair< StreamID_value, ModuleID >, std::pair< decltype(beginTime_), bool >, edm::StdPairHasher > stallStart_
Transition transition() const
Definition: GlobalContext.h:59
void postModuleStreamTransition(StreamContext const &, ModuleCallingContext const &)
void postSourceEvent(StreamID)
void postModuleEvent(StreamContext const &, ModuleCallingContext const &)
decltype(std::declval< StreamID >().value()) StreamID_value
HLT enums.
#define update(a, b)
EventID const & eventID() const
Definition: StreamContext.h:60
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< StallStatistics > moduleStats_
step
Definition: StallMonitor.cc:83
unsigned int value() const
Definition: StreamID.h:43
decltype(std::declval< ModuleDescription >().id()) ModuleID
std::string const & moduleLabel() const
auto stream_id(edm::StreamContext const &cs)
def move(src, dest)
Definition: eostools.py:511
void preModuleGlobalTransition(GlobalContext const &, ModuleCallingContext const &)
EventID const & max(EventID const &lh, EventID const &rh)
Definition: EventID.h:118