CMS 3D CMS Logo

InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/concurrent_unordered_set.h"
22 #include "tbb/task.h"
23 #include "tbb/task_scheduler_observer.h"
24 #include <memory>
25 
26 #include <thread>
27 #include <sys/wait.h>
28 #include <sstream>
29 #include <cstring>
30 #include <poll.h>
31 #include <atomic>
32 #include <algorithm>
33 #include <vector>
34 #include <string>
35 #include <array>
36 
37 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
38 // version. This can break our stack trace printer. Avoid this by
39 // invoking the syscall directly.
40 #ifdef __linux__
41 #include <syscall.h>
42 #endif
43 
44 #include "TROOT.h"
45 #include "TError.h"
46 #include "TFile.h"
47 #include "TInterpreter.h"
48 #include "TH1.h"
49 #include "TSystem.h"
50 #include "TUnixSystem.h"
51 #include "TTree.h"
52 #include "TVirtualStreamerInfo.h"
53 
54 #include "TClassTable.h"
55 
56 #include <memory>
57 
58 namespace {
59  // size of static buffer allocated for listing module names following a
60  // stacktrace abort
61  constexpr std::size_t moduleBufferSize = 128;
62 } // namespace
63 
64 namespace edm {
66  class ParameterSet;
67  class ActivityRegistry;
68 
69  namespace service {
70  class InitRootHandlers : public RootHandlers {
71  friend int cmssw_stacktrace(void*);
72 
73  public:
74  class ThreadTracker : public tbb::task_scheduler_observer {
75  public:
76  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
77 
78  ThreadTracker() : tbb::task_scheduler_observer() { observe(true); }
79  void on_scheduler_entry(bool) override {
80  // ensure thread local has been allocated; not necessary on Linux with
81  // the current cmsRun linkage, but could be an issue if the platform
82  // or linkage leads to "lazy" allocation of the thread local. By
83  // referencing it here we make sure it has been allocated and can be
84  // accessed safely from our signal handler.
86  threadIDs_.insert(pthread_self());
87  }
88  const Container_type& IDs() { return threadIDs_; }
89 
90  private:
92  };
93 
94  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
95  ~InitRootHandlers() override;
96 
97  static void fillDescriptions(ConfigurationDescriptions& descriptions);
98  static void stacktraceFromThread();
100  static int stackTracePause() { return stackTracePause_; }
101 
102  static std::vector<std::array<char, moduleBufferSize>> moduleListBuffers_;
103  static std::atomic<std::size_t> nextModule_, doneModules_;
104 
105  private:
106  static char* const* getPstackArgv();
107  void enableWarnings_() override;
109  void willBeUsingThreads() override;
110 
111  void cachePidInfo();
112  static void stacktraceHelperThread();
113 
114  static const int pidStringLength_ = 200;
116  static char* const pstackArgv_[];
117  static int parentToChild_[2];
118  static int childToParent_[2];
119  static std::unique_ptr<std::thread> helperThread_;
121  static int stackTracePause_;
122 
127  std::shared_ptr<const void> sigBusHandler_;
128  std::shared_ptr<const void> sigSegvHandler_;
129  std::shared_ptr<const void> sigIllHandler_;
130  std::shared_ptr<const void> sigTermHandler_;
131  std::shared_ptr<const void> sigAbrtHandler_;
132  };
133 
134  inline bool isProcessWideService(InitRootHandlers const*) { return true; }
135 
136  } // end of namespace service
137 } // end of namespace edm
138 
139 namespace edm {
140  namespace service {
141  int cmssw_stacktrace(void*);
142  }
143 } // namespace edm
144 
145 namespace {
147 
148  bool s_ignoreEverything = false;
149 
150  template <std::size_t SIZE>
151  bool find_if_string(const std::string& search, const std::array<const char* const, SIZE>& substrs) {
152  return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool {
153  return (search.find(s) != std::string::npos);
154  }) != substrs.end());
155  }
156 
157  constexpr std::array<const char* const, 8> in_message{
158  {"no dictionary for class",
159  "already in TClassTable",
160  "matrix not positive definite",
161  "not a TStreamerInfo object",
162  "Problems declaring payload",
163  "Announced number of args different from the real number of argument passed", // Always printed if gDebug>0 - regardless of whether warning message is real.
164  "nbins is <=0 - set to nbins = 1",
165  "nbinsy is <=0 - set to nbinsy = 1"}};
166 
167  constexpr std::array<const char* const, 6> in_location{{"Fit",
168  "TDecompChol::Solve",
169  "THistPainter::PaintInit",
170  "TUnixSystem::SetDisplay",
171  "TGClient::GetFontByName",
172  "Inverter::Dinv"}};
173 
174  constexpr std::array<const char* const, 3> in_message_print{{"number of iterations was insufficient",
175  "bad integrand behavior",
176  "integral is divergent, or slowly convergent"}};
177 
178  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
179  bool die = false;
180 
181  // Translate ROOT severity level to MessageLogger severity level
182 
184 
185  if (level >= kFatal) {
187  } else if (level >= kSysError) {
189  } else if (level >= kError) {
191  } else if (level >= kWarning) {
193  }
194 
195  if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
197  }
198 
199  // Adapt C-strings to std::strings
200  // Arrange to report the error location as furnished by Root
201 
202  std::string el_location = "@SUB=?";
203  if (location != nullptr)
204  el_location = std::string("@SUB=") + std::string(location);
205 
206  std::string el_message = "?";
207  if (message != nullptr)
208  el_message = message;
209 
210  // Try to create a meaningful id string using knowledge of ROOT error messages
211  //
212  // id == "ROOT-ClassName" where ClassName is the affected class
213  // else "ROOT/ClassName" where ClassName is the error-declaring class
214  // else "ROOT"
215 
216  std::string el_identifier = "ROOT";
217 
218  std::string precursor("class ");
219  size_t index1 = el_message.find(precursor);
220  if (index1 != std::string::npos) {
221  size_t index2 = index1 + precursor.length();
222  size_t index3 = el_message.find_first_of(" :", index2);
223  if (index3 != std::string::npos) {
224  size_t substrlen = index3 - index2;
225  el_identifier += "-";
226  el_identifier += el_message.substr(index2, substrlen);
227  }
228  } else {
229  index1 = el_location.find("::");
230  if (index1 != std::string::npos) {
231  el_identifier += "/";
232  el_identifier += el_location.substr(0, index1);
233  }
234  }
235 
236  // Intercept some messages and upgrade the severity
237 
238  if ((el_location.find("TBranchElement::Fill") != std::string::npos) &&
239  (el_message.find("fill branch") != std::string::npos) && (el_message.find("address") != std::string::npos) &&
240  (el_message.find("not set") != std::string::npos)) {
242  }
243 
244  if ((el_message.find("Tree branches") != std::string::npos) &&
245  (el_message.find("different numbers of entries") != std::string::npos)) {
247  }
248 
249  // Intercept some messages and downgrade the severity
250 
251  if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
252  (level < kError and (el_location.find("CINTTypedefBuilder::Setup") != std::string::npos) and
253  (el_message.find("possible entries are in use!") != std::string::npos))) {
255  }
256 
257  // These are a special case because we do not want them to
258  // be fatal, but we do want an error to print.
259  bool alreadyPrinted = false;
260  if (find_if_string(el_message, in_message_print)) {
262  edm::LogError("Root_Error") << el_location << el_message;
263  alreadyPrinted = true;
264  }
265 
266  if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
267  // Don't throw if the message is just informational.
268  die = false;
269  } else {
270  die = true;
271  }
272 
273  // Feed the message to the MessageLogger and let it choose to suppress or not.
274 
275  // Root has declared a fatal error. Throw an EDMException unless the
276  // message corresponds to a pending signal. In that case, do not throw
277  // but let the OS deal with the signal in the usual way.
278  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
279  std::ostringstream sstr;
280  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
281  edm::Exception except(edm::errors::FatalRootError, sstr.str());
282  except.addAdditionalInfo(except.message());
283  except.clearMessage();
284  throw except;
285  }
286 
287  // Typically, we get here only for informational messages,
288  // but we leave the other code in just in case we change
289  // the criteria for throwing.
290  if (!alreadyPrinted) {
291  if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
292  edm::LogError("Root_Fatal") << el_location << el_message;
293  } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
294  edm::LogError("Root_Severe") << el_location << el_message;
295  } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
296  edm::LogError("Root_Error") << el_location << el_message;
297  } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
298  edm::LogWarning("Root_Warning") << el_location << el_message;
299  } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
300  edm::LogInfo("Root_Information") << el_location << el_message;
301  }
302  }
303  }
304 
305  void RootErrorHandler(int level, bool, char const* location, char const* message) {
306  RootErrorHandlerImpl(level, location, message);
307  }
308 
309  extern "C" {
310  void set_default_signals() {
311  signal(SIGILL, SIG_DFL);
312  signal(SIGSEGV, SIG_DFL);
313  signal(SIGBUS, SIG_DFL);
314  signal(SIGTERM, SIG_DFL);
315  signal(SIGABRT, SIG_DFL);
316  }
317 
318  static int full_write(int fd, const char* text) {
319  const char* buffer = text;
320  size_t count = strlen(text);
321  ssize_t written = 0;
322  while (count) {
323  written = write(fd, buffer, count);
324  if (written == -1) {
325  if (errno == EINTR) {
326  continue;
327  } else {
328  return -errno;
329  }
330  }
331  count -= written;
332  buffer += written;
333  }
334  return 0;
335  }
336 
337  static int full_read(int fd, char* inbuf, size_t len, int timeout_s = -1) {
338  char* buf = inbuf;
339  size_t count = len;
340  ssize_t complete = 0;
341  std::chrono::time_point<std::chrono::steady_clock> end_time =
343  int flags;
344  if (timeout_s < 0) {
345  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
346  } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
347  return -errno;
348  }
349  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
350  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
351  return -errno;
352  }
353  }
354  while (count) {
355  if (timeout_s >= 0) {
356  struct pollfd poll_info {
357  fd, POLLIN, 0
358  };
359  int ms_remaining =
360  std::chrono::duration_cast<std::chrono::milliseconds>(end_time - std::chrono::steady_clock::now()).count();
361  if (ms_remaining > 0) {
362  int rc = poll(&poll_info, 1, ms_remaining);
363  if (rc <= 0) {
364  if (rc < 0) {
365  if (errno == EINTR || errno == EAGAIN) {
366  continue;
367  }
368  rc = -errno;
369  } else {
370  rc = -ETIMEDOUT;
371  }
372  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
373  fcntl(fd, F_SETFL, flags);
374  }
375  return rc;
376  }
377  } else if (ms_remaining < 0) {
378  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
379  fcntl(fd, F_SETFL, flags);
380  }
381  return -ETIMEDOUT;
382  }
383  }
384  complete = read(fd, buf, count);
385  if (complete == -1) {
386  if (errno == EINTR) {
387  continue;
388  } else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
389  continue;
390  } else {
391  int orig_errno = errno;
392  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
393  fcntl(fd, F_SETFL, flags);
394  }
395  return -orig_errno;
396  }
397  }
398  count -= complete;
399  buf += complete;
400  }
401  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
402  fcntl(fd, F_SETFL, flags);
403  }
404  return 0;
405  }
406 
407  static int full_cerr_write(const char* text) { return full_write(2, text); }
408 
409 // these signals are only used inside the stacktrace signal handler,
410 // so common signals can be used. They do have to be different, since
411 // we do not set SA_NODEFER, and RESUME must be a signal that will
412 // cause sleep() to return early.
413 #if defined(SIGRTMAX)
414 #define PAUSE_SIGNAL SIGRTMAX
415 #define RESUME_SIGNAL SIGRTMAX - 1
416 #elif defined(SIGINFO) // macOS/BSD
417 #define PAUSE_SIGNAL SIGINFO
418 #define RESUME_SIGNAL SIGALRM
419 #endif
420 
421  // does nothing, here only to interrupt the sleep() in the pause handler
422  void sig_resume_handler(int sig, siginfo_t*, void*) {}
423 
424  // pause a thread so that a (slow) stacktrace will capture the current state
425  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
426  using namespace edm::service;
427 
428 #ifdef RESUME_SIGNAL
429  sigset_t sigset;
430  sigemptyset(&sigset);
431  sigaddset(&sigset, RESUME_SIGNAL);
432  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
433 #endif
434  // sleep interrrupts on a handled delivery of the resume signal
436 
437  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
440  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
441 
442  strlcpy(buff, "\nModule: ", moduleBufferSize);
444  strlcat(buff,
445  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
446  moduleBufferSize);
447  strlcat(buff, ":", moduleBufferSize);
448  strlcat(buff,
449  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
450  moduleBufferSize);
451  } else {
452  strlcat(buff, "none", moduleBufferSize);
453  }
455  }
456  }
457  }
458 
459  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
460  using namespace edm::service;
461 
462  const auto& tids = InitRootHandlers::threadIDs();
463 
464  const auto self = pthread_self();
465 #ifdef PAUSE_SIGNAL
466  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
467  // install the "pause" handler
468  struct sigaction act;
469  act.sa_sigaction = sig_pause_for_stacktrace;
470  act.sa_flags = 0;
471  sigemptyset(&act.sa_mask);
472  sigaction(PAUSE_SIGNAL, &act, nullptr);
473 
474  // unblock pause signal globally, resume is unblocked in the pause handler
475  sigset_t pausesigset;
476  sigemptyset(&pausesigset);
477  sigaddset(&pausesigset, PAUSE_SIGNAL);
478  sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
479 
480  // send a pause signal to all CMSSW/TBB threads other than self
481  for (auto id : tids) {
482  if (self != id) {
483  pthread_kill(id, PAUSE_SIGNAL);
484  }
485  }
486 
487 #ifdef RESUME_SIGNAL
488  // install the "resume" handler
489  act.sa_sigaction = sig_resume_handler;
490  sigaction(RESUME_SIGNAL, &act, nullptr);
491 #endif
492  }
493 #endif
494 
495  const char* signalname = "unknown";
496  switch (sig) {
497  case SIGBUS: {
498  signalname = "bus error";
499  break;
500  }
501  case SIGSEGV: {
502  signalname = "segmentation violation";
503  break;
504  }
505  case SIGILL: {
506  signalname = "illegal instruction";
507  break;
508  }
509  case SIGTERM: {
510  signalname = "external termination request";
511  break;
512  }
513  case SIGABRT: {
514  signalname = "abort signal";
515  break;
516  }
517  default:
518  break;
519  }
520  full_cerr_write("\n\nA fatal system signal has occurred: ");
521  full_cerr_write(signalname);
522  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
523 
525 
526  // resume the signal handlers to store the current module; we are not guaranteed they
527  // will have time to store their modules, so there is a race condition; this could be
528  // avoided by storing the module information before sleeping, a change that may be
529  // made when we're convinced accessing the thread-local current module is safe.
530 #ifdef RESUME_SIGNAL
531  std::size_t notified = 0;
532  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
533  for (auto id : tids) {
534  if (self != id) {
535  if (pthread_kill(id, RESUME_SIGNAL) == 0)
536  ++notified;
537  }
538  }
539  }
540 #endif
541 
542  full_cerr_write("\nCurrent Modules:\n");
543 
544  // Checking tids.count(self) ensures that we only try to access the current module in
545  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
546  // time the thread is registered, so any lazy allocation will have been done at that
547  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
548  // is allocated at exec time, not lazily.
549  if (tids.count(self) > 0) {
550  char buff[moduleBufferSize] = "\nModule: ";
552  strlcat(buff,
553  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
554  moduleBufferSize);
555  strlcat(buff, ":", moduleBufferSize);
556  strlcat(buff,
557  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
558  moduleBufferSize);
559  } else {
560  strlcat(buff, "none", moduleBufferSize);
561  }
562  strlcat(buff, " (crashed)", moduleBufferSize);
563  full_cerr_write(buff);
564  } else {
565  full_cerr_write("\nModule: non-CMSSW (crashed)");
566  }
567 
568 #ifdef PAUSE_SIGNAL
569  // wait a short interval for the paused threads to resume and fill in their module
570  // information, then print
571  if (InitRootHandlers::doneModules_.is_lock_free()) {
572  int spincount = 0;
573  timespec t = {0, 1000};
574  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) {
575  nanosleep(&t, nullptr);
576  }
577  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
578  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
579  }
580  }
581 #endif
582 
583  full_cerr_write("\n\nA fatal system signal has occurred: ");
584  full_cerr_write(signalname);
585  full_cerr_write("\n");
586 
587  // For these five known cases, re-raise the signal to get the correct
588  // exit code.
589  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT)) {
590  signal(sig, SIG_DFL);
591  raise(sig);
592  } else {
593  set_default_signals();
594  ::abort();
595  }
596  }
597 
598  void sig_abort(int sig, siginfo_t*, void*) {
599  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
600 
601  // re-raise the signal to get the correct exit code
602  signal(sig, SIG_DFL);
603  raise(sig);
604 
605  // shouldn't get here
606  set_default_signals();
607  ::sleep(10);
608  ::abort();
609  }
610  }
611 } // end of unnamed namespace
612 
613 namespace edm {
614  namespace service {
615 
616  /*
617  * We've run into issues where GDB fails to print the thread which calls clone().
618  * To avoid this problem, we have an alternate approach below where the signal handler
619  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
620  * invocation; we don't care if that thread is missing from the traceback in this case.
621  */
622  static void cmssw_stacktrace_fork();
623 
625  int toParent = childToParent_[1];
626  int fromParent = parentToChild_[0];
627  char buf[2];
628  buf[1] = '\0';
629 
630  while (true) {
631  int result = full_read(fromParent, buf, 1);
632  if (result < 0) {
633  // To avoid a deadlock (this function is NOT re-entrant), reset signals
634  // We never set them back to the CMSSW handler because we assume the parent
635  // thread will abort for us.
636  set_default_signals();
637  close(toParent);
638  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
639  full_cerr_write(strerror(-result));
640  full_cerr_write("\n");
641  ::abort();
642  }
643  if (buf[0] == '1') {
644  set_default_signals();
646  full_write(toParent, buf);
647  } else if (buf[0] == '2') {
648  // We have just finished forking. Reload the file descriptors for thread
649  // communication.
650  close(toParent);
651  close(fromParent);
652  toParent = childToParent_[1];
653  fromParent = parentToChild_[0];
654  } else if (buf[0] == '3') {
655  break;
656  } else {
657  set_default_signals();
658  close(toParent);
659  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
660  full_cerr_write(buf);
661  full_cerr_write("\n");
662  ::abort();
663  }
664  }
665  }
666 
668  int result = full_write(parentToChild_[1], "1");
669  if (result < 0) {
670  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
671  full_cerr_write(strerror(-result));
672  full_cerr_write("\n");
673  return;
674  }
675  char buf[2];
676  buf[1] = '\0';
677  if ((result = full_read(childToParent_[0], buf, 1, 5 * 60)) < 0) {
678  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
679  if (result == -ETIMEDOUT) {
680  full_cerr_write("timed out waiting for GDB to complete.");
681  } else {
682  full_cerr_write(strerror(-result));
683  }
684  full_cerr_write("\n");
685  return;
686  }
687  }
688 
690  char child_stack[4 * 1024];
691  char* child_stack_ptr = child_stack + 4 * 1024;
692  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
693  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
694  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
695  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
696  int pid =
697 #ifdef __linux__
698  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM | CLONE_FS | SIGCHLD, nullptr);
699 #else
700  fork();
701  if (child_stack_ptr) {
702  } // Suppress 'unused variable' warning on non-Linux
703  if (pid == 0) {
705  }
706 #endif
707  if (pid == -1) {
708  full_cerr_write("(Attempt to perform stack dump failed.)\n");
709  } else {
710  int status;
711  if (waitpid(pid, &status, 0) == -1) {
712  full_cerr_write("(Failed to wait on stack dump output.)\n");
713  }
714  if (status) {
715  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
716  }
717  }
718  }
719 
720  int cmssw_stacktrace(void* /*arg*/) {
721  set_default_signals();
722 
724  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
725  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
726  // calls dlsym.
727 #ifdef __linux__
728  syscall(SYS_execve, "/bin/sh", argv, __environ);
729 #else
730  execv("/bin/sh", argv);
731 #endif
732  ::abort();
733  return 1;
734  }
735 
736  static char pstackName[] = "(CMSSW stack trace helper)";
737  static char dashC[] = "-c";
740  int InitRootHandlers::parentToChild_[2] = {-1, -1};
741  int InitRootHandlers::childToParent_[2] = {-1, -1};
742  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
744  std::vector<std::array<char, moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
745  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
747 
749  : RootHandlers(),
750  unloadSigHandler_(pset.getUntrackedParameter<bool>("UnloadRootSigHandler")),
751  resetErrHandler_(pset.getUntrackedParameter<bool>("ResetRootErrHandler")),
752  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
753  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool>("AutoLibraryLoader")) {
754  stackTracePause_ = pset.getUntrackedParameter<int>("StackTracePauseTime");
755 
756  if (unloadSigHandler_) {
757  // Deactivate all the Root signal handlers and restore the system defaults
758  gSystem->ResetSignal(kSigChild);
759  gSystem->ResetSignal(kSigBus);
760  gSystem->ResetSignal(kSigSegmentationViolation);
761  gSystem->ResetSignal(kSigIllegalInstruction);
762  gSystem->ResetSignal(kSigSystem);
763  gSystem->ResetSignal(kSigPipe);
764  gSystem->ResetSignal(kSigAlarm);
765  gSystem->ResetSignal(kSigUrgent);
766  gSystem->ResetSignal(kSigFloatingException);
767  gSystem->ResetSignal(kSigWindowChanged);
768  } else if (pset.getUntrackedParameter<bool>("AbortOnSignal")) {
769  cachePidInfo();
770 
771  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
772  // it requires an TApplication to be instantiated which causes problems
773  gSystem->ResetSignal(kSigBus);
774  gSystem->ResetSignal(kSigSegmentationViolation);
775  gSystem->ResetSignal(kSigIllegalInstruction);
776  installCustomHandler(SIGBUS, sig_dostack_then_abort);
777  sigBusHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGBUS, sig_abort); });
778  installCustomHandler(SIGSEGV, sig_dostack_then_abort);
779  sigSegvHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGSEGV, sig_abort); });
780  installCustomHandler(SIGILL, sig_dostack_then_abort);
781  sigIllHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGILL, sig_abort); });
782  installCustomHandler(SIGTERM, sig_dostack_then_abort);
783  sigTermHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGTERM, sig_abort); });
784  installCustomHandler(SIGABRT, sig_dostack_then_abort);
785  sigAbrtHandler_ = std::shared_ptr<const void>(nullptr, [](void*) {
786  signal(SIGABRT, SIG_DFL); // release SIGABRT to default
787  });
788  }
789 
790  iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds) {
791  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
792  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
793  }
794  });
795 
796  if (resetErrHandler_) {
797  // Replace the Root error handler with one that uses the MessageLogger
798  SetErrorHandler(RootErrorHandler);
799  }
800 
801  // Enable automatic Root library loading.
802  if (autoLibraryLoader_) {
803  gInterpreter->SetClassAutoloading(1);
804  }
805 
806  // Set ROOT parameters.
807  TTree::SetMaxTreeSize(kMaxLong64);
808  TH1::AddDirectory(kFALSE);
809  //G__SetCatchException(0);
810 
811  // Set custom streamers
813 
814  // Load the library containing dictionaries for std:: classes, if not already loaded.
815  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int>>))) {
816  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
817  }
818 
819  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
820  if (debugLevel > 0) {
821  gDebug = debugLevel;
822  }
823 
824  // Enable Root implicit multi-threading
825  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
826  if (imt && not ROOT::IsImplicitMTEnabled()) {
827  ROOT::EnableImplicitMT();
828  }
829  }
830 
832  // close all open ROOT files
833  TIter iter(gROOT->GetListOfFiles());
834  TObject* obj = nullptr;
835  while (nullptr != (obj = iter.Next())) {
836  TFile* f = dynamic_cast<TFile*>(obj);
837  if (f) {
838  // We get a new iterator each time,
839  // because closing a file can invalidate the iterator
840  f->Close();
841  iter = TIter(gROOT->GetListOfFiles());
842  }
843  }
844  }
845 
847  //Tell Root we want to be multi-threaded
848  ROOT::EnableThreadSafety();
849 
850  //When threading, also have to keep ROOT from logging all TObjects into a list
851  TObject::SetObjectStat(false);
852 
853  //Have to avoid having Streamers modify themselves after they have been used
854  TVirtualStreamerInfo::Optimize(false);
855  }
856 
859  desc.setComment("Centralized interface to ROOT.");
860  desc.addUntracked<bool>("UnloadRootSigHandler", false)
861  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
862  desc.addUntracked<bool>("ResetRootErrHandler", true)
863  ->setComment(
864  "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
865  desc.addUntracked<bool>("AutoLibraryLoader", true)
866  ->setComment("If True, enables automatic loading of data dictionaries.");
867  desc.addUntracked<bool>("LoadAllDictionaries", false)->setComment("If True, loads all ROOT dictionaries.");
868  desc.addUntracked<bool>("EnableIMT", true)->setComment("If True, calls ROOT::EnableImplicitMT().");
869  desc.addUntracked<bool>("AbortOnSignal", true)
870  ->setComment(
871  "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
872  "attempts to do a clean shutdown.");
873  desc.addUntracked<int>("DebugLevel", 0)->setComment("Sets ROOT's gDebug value.");
874  desc.addUntracked<int>("StackTracePauseTime", 300)
875  ->setComment("Seconds to pause other threads during stack trace.");
876  descriptions.add("InitRootHandlers", desc);
877  }
878 
879  char* const* InitRootHandlers::getPstackArgv() { return pstackArgv_; }
880 
882 
884 
886  if (helperThread_) {
887  //Another InitRootHandlers was initialized in this job, possibly
888  // because multiple EventProcessors are being used.
889  //In that case, we are already all setup
890  return;
891  }
892  if (snprintf(pidString_,
893  pidStringLength_ - 1,
894  "date; gdb -quiet -p %d 2>&1 <<EOF |\n"
895  "set width 0\n"
896  "set height 0\n"
897  "set pagination no\n"
898  "thread apply all bt\n"
899  "EOF\n"
900  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
901  getpid()) >= pidStringLength_) {
902  std::ostringstream sstr;
903  sstr << "Unable to pre-allocate stacktrace handler information";
904  edm::Exception except(edm::errors::OtherCMS, sstr.str());
905  throw except;
906  }
907 
908  // These are initialized to -1; harmless to close an invalid FD.
909  // If this is called post-fork, we don't want to be communicating on
910  // these FDs as they are used internally by the parent.
911  close(childToParent_[0]);
912  close(childToParent_[1]);
913  childToParent_[0] = -1;
914  childToParent_[1] = -1;
915  close(parentToChild_[0]);
916  close(parentToChild_[1]);
917  parentToChild_[0] = -1;
918  parentToChild_[1] = -1;
919 
920  if (-1 == pipe2(childToParent_, O_CLOEXEC)) {
921  std::ostringstream sstr;
922  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
923  edm::Exception except(edm::errors::OtherCMS, sstr.str());
924  throw except;
925  }
926 
927  if (-1 == pipe2(parentToChild_, O_CLOEXEC)) {
928  close(childToParent_[0]);
929  close(childToParent_[1]);
930  childToParent_[0] = -1;
931  childToParent_[1] = -1;
932  std::ostringstream sstr;
933  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
934  edm::Exception except(edm::errors::OtherCMS, sstr.str());
935  throw except;
936  }
937 
938  helperThread_ = std::make_unique<std::thread>(stacktraceHelperThread);
939  helperThread_->detach();
940  }
941 
942  } // end of namespace service
943 } // end of namespace edm
944 
personalPlayback.level
level
Definition: personalPlayback.py:22
ConfigurationDescriptions.h
edm::service::InitRootHandlers::childToParent_
static int childToParent_[2]
Definition: InitRootHandlers.cc:118
service
Definition: service.py:1
edm::TypeWithDict::byName
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:74
edm::service::InitRootHandlers::threadIDs
static const ThreadTracker::Container_type & threadIDs()
Definition: InitRootHandlers.cc:99
ModuleCallingContext.h
cmsBatch.argv
argv
Definition: cmsBatch.py:279
electrons_cff.bool
bool
Definition: electrons_cff.py:393
mps_fire.i
i
Definition: mps_fire.py:428
edm::service::InitRootHandlers::helperThread_
static std::unique_ptr< std::thread > helperThread_
Definition: InitRootHandlers.cc:119
MessageLogger.h
edm::service::InitRootHandlers::sigIllHandler_
std::shared_ptr< const void > sigIllHandler_
Definition: InitRootHandlers.cc:129
edm::service::InitRootHandlers::sigTermHandler_
std::shared_ptr< const void > sigTermHandler_
Definition: InitRootHandlers.cc:130
edm::RootHandlers::SeverityLevel::kSysError
ActivityRegistry
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
edm::service::InitRootHandlers::parentToChild_
static int parentToChild_[2]
Definition: InitRootHandlers.cc:117
edm::service::InitRootHandlers::threadTracker_
static ThreadTracker threadTracker_
Definition: InitRootHandlers.cc:120
submitPVValidationJobs.now
now
Definition: submitPVValidationJobs.py:639
mps_update.status
status
Definition: mps_update.py:69
edm::service::InitRootHandlers::ThreadTracker::Container_type
tbb::concurrent_unordered_set< pthread_t > Container_type
Definition: InitRootHandlers.cc:76
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::service::InitRootHandlers::willBeUsingThreads
void willBeUsingThreads() override
Definition: InitRootHandlers.cc:846
InitRootHandlers_cfi.InitRootHandlers
InitRootHandlers
Definition: InitRootHandlers_cfi.py:5
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
edm::RootHandlers::SeverityLevel::kInfo
edm::service::InitRootHandlers::InitRootHandlers
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
Definition: InitRootHandlers.cc:748
PluginCapabilities.h
CurrentModuleOnThread.h
edm::service::InitRootHandlers::fillDescriptions
static void fillDescriptions(ConfigurationDescriptions &descriptions)
Definition: InitRootHandlers.cc:857
HLT_FULL_cff.debugLevel
debugLevel
Definition: HLT_FULL_cff.py:88237
TypeWithDict.h
edm::RootHandlers::SeverityLevel::kWarning
edm::service::isProcessWideService
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:98
edm::LogInfo
Log< level::Info, false > LogInfo
Definition: MessageLogger.h:125
edm::service::InitRootHandlers::moduleListBuffers_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
Definition: InitRootHandlers.cc:102
edm::LogWarning
Log< level::Warning, false > LogWarning
Definition: MessageLogger.h:122
edm::service::InitRootHandlers::stackTracePause
static int stackTracePause()
Definition: InitRootHandlers.cc:100
edm::Exception
Definition: EDMException.h:77
edm::setRefCoreStreamerInTClass
void setRefCoreStreamerInTClass()
Definition: RefCoreStreamer.cc:68
edmScanValgrind.buffer
buffer
Definition: edmScanValgrind.py:171
ModuleDescription.h
ActivityRegistry.h
EDMException.h
edm::service::cmssw_stacktrace_fork
static void cmssw_stacktrace_fork()
Definition: InitRootHandlers.cc:689
ztee.fd
fd
Definition: ztee.py:136
RootHandlersMaker
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
Definition: InitRootHandlers.cc:946
alignCSCRings.s
s
Definition: alignCSCRings.py:92
edm::service::InitRootHandlers::ThreadTracker::threadIDs_
Container_type threadIDs_
Definition: InitRootHandlers.cc:91
edm::service::dashC
static char dashC[]
Definition: InitRootHandlers.cc:737
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:57
edm::RootHandlers::SeverityLevel::kError
clone
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
RefCoreStreamer.h
edm::serviceregistry::AllArgsMaker
Definition: ServiceMaker.h:47
edm::errors::FatalRootError
Definition: EDMException.h:51
seconds
double seconds()
edm::service::InitRootHandlers::pidStringLength_
static const int pidStringLength_
Definition: InitRootHandlers.cc:114
edm::ActivityRegistry
Definition: ActivityRegistry.h:133
edm::service::InitRootHandlers::stacktraceFromThread
static void stacktraceFromThread()
Definition: InitRootHandlers.cc:667
DEFINE_FWK_SERVICE_MAKER
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:109
submitPVResolutionJobs.count
count
Definition: submitPVResolutionJobs.py:352
cond::persistency::search
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:21
ParameterSetDescription.h
edm::service::InitRootHandlers::stacktraceHelperThread
static void stacktraceHelperThread()
Definition: InitRootHandlers.cc:624
edm::service::InitRootHandlers::~InitRootHandlers
~InitRootHandlers() override
Definition: InitRootHandlers.cc:831
ServiceMaker.h
getGTfromDQMFile.obj
obj
Definition: getGTfromDQMFile.py:32
edm::service::InitRootHandlers::getPstackArgv
static char *const * getPstackArgv()
Definition: InitRootHandlers.cc:879
edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
edm::service::InitRootHandlers
Definition: InitRootHandlers.cc:70
UnixSignalHandlers.h
edm::service::SystemBounds
Definition: SystemBounds.h:29
edm::ParameterSet
Definition: ParameterSet.h:47
edm::service::SystemBounds::maxNumberOfThreads
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:38
edm::service::InitRootHandlers::loadAllDictionaries_
bool loadAllDictionaries_
Definition: InitRootHandlers.cc:125
ParameterSet
Definition: Functions.h:16
cms::Exception::addAdditionalInfo
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:169
edm::service::InitRootHandlers::cachePidInfo
void cachePidInfo()
Definition: InitRootHandlers.cc:885
edm::RootHandlers::SeverityLevel::kFatal
edm::hasDictionary
bool hasDictionary(std::type_info const &)
Definition: TypeWithDict.cc:809
watchdog.SIGTERM
SIGTERM
Definition: watchdog.py:83
trackerHitRTTI::vector
Definition: trackerHitRTTI.h:21
fwlog::kWarning
Definition: fwLog.h:35
edm::service::InitRootHandlers::sigBusHandler_
std::shared_ptr< const void > sigBusHandler_
Definition: InitRootHandlers.cc:127
edm::service
Definition: TFileService.h:95
edm::LogError
Log< level::Error, false > LogError
Definition: MessageLogger.h:123
edm::service::InitRootHandlers::ThreadTracker::IDs
const Container_type & IDs()
Definition: InitRootHandlers.cc:88
edm::CurrentModuleOnThread::getCurrentModuleOnThread
static ModuleCallingContext const * getCurrentModuleOnThread()
Definition: CurrentModuleOnThread.h:17
ELseverityLevel.h
edm::service::InitRootHandlers::stackTracePause_
static int stackTracePause_
Definition: InitRootHandlers.cc:121
edm::service::InitRootHandlers::ignoreWarnings_
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
Definition: InitRootHandlers.cc:883
visDQMUpload.buf
buf
Definition: visDQMUpload.py:154
edm::service::InitRootHandlers::ThreadTracker::on_scheduler_entry
void on_scheduler_entry(bool) override
Definition: InitRootHandlers.cc:79
edm::service::InitRootHandlers::ThreadTracker::ThreadTracker
ThreadTracker()
Definition: InitRootHandlers.cc:78
edm::service::InitRootHandlers::resetErrHandler_
bool resetErrHandler_
Definition: InitRootHandlers.cc:124
readEcalDQMStatus.read
read
Definition: readEcalDQMStatus.py:38
writeEcalDQMStatus.write
write
Definition: writeEcalDQMStatus.py:48
edm::ActivityRegistry::watchPreallocate
void watchPreallocate(Preallocate::slot_type const &iSlot)
Definition: ActivityRegistry.h:143
edm::RootHandlers::SeverityLevel
SeverityLevel
Definition: RootHandlers.h:9
edm::service::InitRootHandlers::sigAbrtHandler_
std::shared_ptr< const void > sigAbrtHandler_
Definition: InitRootHandlers.cc:131
edm::service::InitRootHandlers::enableWarnings_
void enableWarnings_() override
Definition: InitRootHandlers.cc:881
edm::service::InitRootHandlers::pstackArgv_
static char *const pstackArgv_[]
Definition: InitRootHandlers.cc:116
submitPVResolutionJobs.desc
string desc
Definition: submitPVResolutionJobs.py:251
edm::service::InitRootHandlers::pidString_
static char pidString_[pidStringLength_]
Definition: InitRootHandlers.cc:115
edm::RootHandlers
Definition: RootHandlers.h:7
edm::service::InitRootHandlers::cmssw_stacktrace
friend int cmssw_stacktrace(void *)
Definition: InitRootHandlers.cc:720
ConfigurationDescriptions
edm::service::InitRootHandlers::doneModules_
static std::atomic< std::size_t > doneModules_
Definition: InitRootHandlers.cc:103
edm::errors::OtherCMS
Definition: EDMException.h:27
edm::service::InitRootHandlers::unloadSigHandler_
bool unloadSigHandler_
Definition: InitRootHandlers.cc:123
or
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
edm::service::InitRootHandlers::nextModule_
static std::atomic< std::size_t > nextModule_
Definition: InitRootHandlers.cc:103
O_NONBLOCK
#define O_NONBLOCK
Definition: SysFile.h:21
RootHandlers.h
data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
edm::service::pstackName
static char pstackName[]
Definition: InitRootHandlers.cc:736
fwlog::kError
Definition: fwLog.h:35
edm::service::cmssw_stacktrace
int cmssw_stacktrace(void *)
Definition: InitRootHandlers.cc:720
EcalCondDBWriter_cfi.location
location
Definition: EcalCondDBWriter_cfi.py:63
mps_fire.result
result
Definition: mps_fire.py:311
edm::installCustomHandler
void installCustomHandler(int signum, CFUNC func)
Definition: UnixSignalHandlers.cc:93
ParameterSet.h
runonSM.text
text
Definition: runonSM.py:43
HerwigMaxPtPartonFilter_cfi.moduleLabel
moduleLabel
Definition: HerwigMaxPtPartonFilter_cfi.py:4
edm::moduleName
std::string moduleName(Provenance const &provenance, ProcessHistory const &history)
Definition: Provenance.cc:27
HLT_FULL_cff.flags
flags
Definition: HLT_FULL_cff.py:13216
edm::service::InitRootHandlers::autoLibraryLoader_
bool autoLibraryLoader_
Definition: InitRootHandlers.cc:126
submitPVValidationJobs.t
string t
Definition: submitPVValidationJobs.py:644
SystemBounds.h
edm::service::InitRootHandlers::sigSegvHandler_
std::shared_ptr< const void > sigSegvHandler_
Definition: InitRootHandlers.cc:128
edm::service::InitRootHandlers::ThreadTracker
Definition: InitRootHandlers.cc:74
muonDTDigis_cfi.pset
pset
Definition: muonDTDigis_cfi.py:27
findQualityFiles.size
size
Write out results.
Definition: findQualityFiles.py:443