Go to the documentation of this file.
20 #include "tbb/task.h"
21 #include "tbb/task_scheduler_observer.h"
22 #include "tbb/concurrent_unordered_set.h"
23 #include <thread>
24 #include <sys/wait.h>
25 #include <sstream>
26 #include <string.h>
27 #include <poll.h>
28 #include <atomic>
30 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
31 // version. This can break our stack trace printer. Avoid this by
32 // invoking the syscall directly.
33 #ifdef __linux__
34 #include <syscall.h>
35 #endif
37 #include "TROOT.h"
38 #include "TError.h"
39 #include "TFile.h"
40 #include "TInterpreter.h"
41 #include "TH1.h"
42 #include "TSystem.h"
43 #include "TUnixSystem.h"
44 #include "TTree.h"
45 #include "TVirtualStreamerInfo.h"
47 #include "TThread.h"
48 #include "TClassTable.h"
50 #include <memory>
52 namespace {
53  // size of static buffer allocated for listing module names following a
54  // stacktrace abort
55  constexpr std::size_t moduleBufferSize = 128;
56 }
58 namespace edm {
60  class ParameterSet;
61  class ActivityRegistry;
63  namespace service {
64  class InitRootHandlers : public RootHandlers {
66  friend int cmssw_stacktrace(void *);
68  public:
69  class ThreadTracker : public tbb::task_scheduler_observer {
70  public:
71  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
73  ThreadTracker() : tbb::task_scheduler_observer() {
74  observe(true);
75  }
76  void on_scheduler_entry(bool) {
77  // ensure thread local has been allocated; not necessary on Linux with
78  // the current cmsRun linkage, but could be an issue if the platform
79  // or linkage leads to "lazy" allocation of the thread local. By
80  // referencing it here we make sure it has been allocated and can be
81  // accessed safely from our signal handler.
83  threadIDs_.insert(pthread_self());
84  }
85  const Container_type& IDs() { return threadIDs_; }
87  private:
88  Container_type threadIDs_;
89  };
91  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
92  virtual ~InitRootHandlers();
94  static void fillDescriptions(ConfigurationDescriptions& descriptions);
95  static void stacktraceFromThread();
97  static int stackTracePause() { return stackTracePause_; }
99  static std::vector<std::array<char,moduleBufferSize>> moduleListBuffers_;
100  static std::atomic<std::size_t> nextModule_, doneModules_;
101  private:
102  static char *const *getPstackArgv();
103  virtual void enableWarnings_() override;
104  virtual void ignoreWarnings_() override;
105  virtual void willBeUsingThreads() override;
106  virtual void initializeThisThreadForUse() override;
108  void cachePidInfoHandler(unsigned int, unsigned int) {cachePidInfo();}
109  void cachePidInfo();
110  static void stacktraceHelperThread();
112  static const int pidStringLength_ = 200;
114  static char * const pstackArgv_[];
115  static int parentToChild_[2];
116  static int childToParent_[2];
117  static std::unique_ptr<std::thread> helperThread_;
119  static int stackTracePause_;
125  std::shared_ptr<const void> sigBusHandler_;
126  std::shared_ptr<const void> sigSegvHandler_;
127  std::shared_ptr<const void> sigIllHandler_;
128  std::shared_ptr<const void> sigTermHandler_;
129  };
131  inline
133  return true;
134  }
136  } // end of namespace service
137 } // end of namespace edm
139 namespace edm {
140  namespace service {
141  int cmssw_stacktrace(void *);
142  }
143 }
145 namespace {
146  enum class SeverityLevel {
147  kInfo,
148  kWarning,
149  kError,
150  kSysError,
151  kFatal
152  };
154  static thread_local bool s_ignoreWarnings = false;
156  static bool s_ignoreEverything = false;
158  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
160  bool die = false;
162  // Translate ROOT severity level to MessageLogger severity level
164  SeverityLevel el_severity = SeverityLevel::kInfo;
166  if (level >= kFatal) {
167  el_severity = SeverityLevel::kFatal;
168  } else if (level >= kSysError) {
169  el_severity = SeverityLevel::kSysError;
170  } else if (level >= kError) {
171  el_severity = SeverityLevel::kError;
172  } else if (level >= kWarning) {
173  el_severity = s_ignoreWarnings ? SeverityLevel::kInfo : SeverityLevel::kWarning;
174  }
176  if(s_ignoreEverything) {
177  el_severity = SeverityLevel::kInfo;
178  }
180  // Adapt C-strings to std::strings
181  // Arrange to report the error location as furnished by Root
183  std::string el_location = "@SUB=?";
184  if (location != 0) el_location = std::string("@SUB=")+std::string(location);
186  std::string el_message = "?";
187  if (message != 0) el_message = message;
189  // Try to create a meaningful id string using knowledge of ROOT error messages
190  //
191  // id == "ROOT-ClassName" where ClassName is the affected class
192  // else "ROOT/ClassName" where ClassName is the error-declaring class
193  // else "ROOT"
195  std::string el_identifier = "ROOT";
197  std::string precursor("class ");
198  size_t index1 = el_message.find(precursor);
199  if (index1 != std::string::npos) {
200  size_t index2 = index1 + precursor.length();
201  size_t index3 = el_message.find_first_of(" :", index2);
202  if (index3 != std::string::npos) {
203  size_t substrlen = index3-index2;
204  el_identifier += "-";
205  el_identifier += el_message.substr(index2,substrlen);
206  }
207  } else {
208  index1 = el_location.find("::");
209  if (index1 != std::string::npos) {
210  el_identifier += "/";
211  el_identifier += el_location.substr(0, index1);
212  }
213  }
215  // Intercept some messages and upgrade the severity
217  if ((el_location.find("TBranchElement::Fill") != std::string::npos)
218  && (el_message.find("fill branch") != std::string::npos)
219  && (el_message.find("address") != std::string::npos)
220  && (el_message.find("not set") != std::string::npos)) {
221  el_severity = SeverityLevel::kFatal;
222  }
224  if ((el_message.find("Tree branches") != std::string::npos)
225  && (el_message.find("different numbers of entries") != std::string::npos)) {
226  el_severity = SeverityLevel::kFatal;
227  }
230  // Intercept some messages and downgrade the severity
232  if ((el_message.find("no dictionary for class") != std::string::npos) ||
233  (el_message.find("already in TClassTable") != std::string::npos) ||
234  (el_message.find("matrix not positive definite") != std::string::npos) ||
235  (el_message.find("not a TStreamerInfo object") != std::string::npos) ||
236  (el_message.find("Problems declaring payload") != std::string::npos) ||
237  (el_message.find("Announced number of args different from the real number of argument passed") != std::string::npos) || // Always printed if gDebug>0 - regardless of whether warning message is real.
238  (el_location.find("Fit") != std::string::npos) ||
239  (el_location.find("TDecompChol::Solve") != std::string::npos) ||
240  (el_location.find("THistPainter::PaintInit") != std::string::npos) ||
241  (el_location.find("TUnixSystem::SetDisplay") != std::string::npos) ||
242  (el_location.find("TGClient::GetFontByName") != std::string::npos) ||
243  (el_location.find("Inverter::Dinv") != std::string::npos) ||
244  (el_message.find("nbins is <=0 - set to nbins = 1") != std::string::npos) ||
245  (el_message.find("nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
246  (level < kError and
247  (el_location.find("CINTTypedefBuilder::Setup")!= std::string::npos) and
248  (el_message.find("possible entries are in use!") != std::string::npos))) {
249  el_severity = SeverityLevel::kInfo;
250  }
252  if (el_severity == SeverityLevel::kInfo) {
253  // Don't throw if the message is just informational.
254  die = false;
255  } else {
256  die = true;
257  }
259  // Feed the message to the MessageLogger and let it choose to suppress or not.
261  // Root has declared a fatal error. Throw an EDMException unless the
262  // message corresponds to a pending signal. In that case, do not throw
263  // but let the OS deal with the signal in the usual way.
264  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
265  std::ostringstream sstr;
266  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
267  edm::Exception except(edm::errors::FatalRootError, sstr.str());
268  except.addAdditionalInfo(except.message());
269  except.clearMessage();
270  throw except;
272  }
274  // Typically, we get here only for informational messages,
275  // but we leave the other code in just in case we change
276  // the criteria for throwing.
277  if (el_severity == SeverityLevel::kFatal) {
278  edm::LogError("Root_Fatal") << el_location << el_message;
279  } else if (el_severity == SeverityLevel::kSysError) {
280  edm::LogError("Root_Severe") << el_location << el_message;
281  } else if (el_severity == SeverityLevel::kError) {
282  edm::LogError("Root_Error") << el_location << el_message;
283  } else if (el_severity == SeverityLevel::kWarning) {
284  edm::LogWarning("Root_Warning") << el_location << el_message ;
285  } else if (el_severity == SeverityLevel::kInfo) {
286  edm::LogInfo("Root_Information") << el_location << el_message ;
287  }
288  }
290  void RootErrorHandler(int level, bool, char const* location, char const* message) {
291  RootErrorHandlerImpl(level, location, message);
292  }
294  extern "C" {
296  static int full_write(int fd, const char *text)
297  {
298  const char *buffer = text;
299  size_t count = strlen(text);
300  ssize_t written = 0;
301  while (count)
302  {
303  written = write(fd, buffer, count);
304  if (written == -1)
305  {
306  if (errno == EINTR) {continue;}
307  else {return -errno;}
308  }
309  count -= written;
310  buffer += written;
311  }
312  return 0;
313  }
315  static int full_read(int fd, char *inbuf, size_t len, int timeout_s=-1)
316  {
317  char *buf = inbuf;
318  size_t count = len;
319  ssize_t complete = 0;
320  std::chrono::time_point<std::chrono::steady_clock> end_time = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
321  int flags;
322  if (timeout_s < 0)
323  {
324  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
325  }
326  else if ((-1 == (flags = fcntl(fd, F_GETFL))))
327  {
328  return -errno;
329  }
330  if ((flags & O_NONBLOCK) != O_NONBLOCK)
331  {
332  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
333  {
334  return -errno;
335  }
336  }
337  while (count)
338  {
339  if (timeout_s >= 0)
340  {
341  struct pollfd poll_info{fd, POLLIN, 0};
342  int ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(end_time-std::chrono::steady_clock::now()).count();
343  if (ms_remaining > 0)
344  {
345  if (poll(&poll_info, 1, ms_remaining) == 0)
346  {
347  if ((flags & O_NONBLOCK) != O_NONBLOCK)
348  {
349  fcntl(fd, F_SETFL, flags);
350  }
351  return -ETIMEDOUT;
352  }
353  }
354  else if (ms_remaining < 0)
355  {
356  if ((flags & O_NONBLOCK) != O_NONBLOCK)
357  {
358  fcntl(fd, F_SETFL, flags);
359  }
360  return -ETIMEDOUT;
361  }
362  }
363  complete = read(fd, buf, count);
364  if (complete == -1)
365  {
366  if (errno == EINTR) {continue;}
367  else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {continue;}
368  else
369  {
370  int orig_errno = errno;
371  if ((flags & O_NONBLOCK) != O_NONBLOCK)
372  {
373  fcntl(fd, F_SETFL, flags);
374  }
375  return -orig_errno;
376  }
377  }
378  count -= complete;
379  buf += complete;
380  }
381  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
382  fcntl(fd, F_SETFL, flags);
383  }
384  return 0;
385  }
387  static int full_cerr_write(const char *text)
388  {
389  return full_write(2, text);
390  }
392 // these signals are only used inside the stacktrace signal handler,
393 // so common signals can be used. They do have to be different, since
394 // we do not set SA_NODEFER, and RESUME must be a signal that will
395 // cause sleep() to return early.
396 #if defined(SIGRTMAX)
399 #elif defined(SIGINFO) // macOS/BSD
402 #endif
404  // does nothing, here only to interrupt the sleep() in the pause handler
405  void sig_resume_handler(int sig, siginfo_t*, void*) {}
407  // pause a thread so that a (slow) stacktrace will capture the current state
408  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
409  using namespace edm::service;
411 #ifdef RESUME_SIGNAL
412  sigset_t sigset;
413  sigemptyset(&sigset);
414  sigaddset(&sigset, RESUME_SIGNAL);
415  pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
416 #endif
417  // sleep interrrupts on a handled delivery of the resume signal
420  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
423  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
425  strlcpy(buff, "\nModule: ", moduleBufferSize);
427  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
428  } else {
429  strlcat(buff, "none", moduleBufferSize);
430  }
432  }
433  }
434  }
436  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
437  using namespace edm::service;
439  const auto& tids = InitRootHandlers::threadIDs();
441  const auto self = pthread_self();
442 #ifdef PAUSE_SIGNAL
443  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
444  // install the "pause" handler
445  struct sigaction act;
446  act.sa_sigaction = sig_pause_for_stacktrace;
447  act.sa_flags = 0;
448  sigemptyset(&act.sa_mask);
449  sigaction(PAUSE_SIGNAL, &act, NULL);
451  // unblock pause signal globally, resume is unblocked in the pause handler
452  sigset_t pausesigset;
453  sigemptyset(&pausesigset);
454  sigaddset(&pausesigset, PAUSE_SIGNAL);
455  sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
457  // send a pause signal to all CMSSW/TBB threads other than self
458  for (auto id : tids) {
459  if (self != id) {
460  pthread_kill(id, PAUSE_SIGNAL);
461  }
462  }
464 #ifdef RESUME_SIGNAL
465  // install the "resume" handler
466  act.sa_sigaction = sig_resume_handler;
467  sigaction(RESUME_SIGNAL, &act, NULL);
468 #endif
469  }
470 #endif
472  const char* signalname = "unknown";
473  switch (sig) {
474  case SIGBUS:
475  {
476  signalname = "bus error";
477  break;
478  }
479  case SIGSEGV:
480  {
481  signalname = "segmentation violation";
482  break;
483  }
484  case SIGILL:
485  {
486  signalname = "illegal instruction";
487  break;
488  }
489  case SIGTERM:
490  {
491  signalname = "external termination request";
492  break;
493  }
494  default:
495  break;
496  }
497  full_cerr_write("\n\nA fatal system signal has occurred: ");
498  full_cerr_write(signalname);
499  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
503  // resume the signal handlers to store the current module; we are not guaranteed they
504  // will have time to store their modules, so there is a race condition; this could be
505  // avoided by storing the module information before sleeping, a change that may be
506  // made when we're convinced accessing the thread-local current module is safe.
507 #ifdef RESUME_SIGNAL
508  std::size_t notified = 0;
509  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
510  for (auto id : tids) {
511  if (self != id) {
512  if (pthread_kill(id, RESUME_SIGNAL) == 0) ++notified;
513  }
514  }
515  }
516 #endif
518  full_cerr_write("\nCurrent Modules:\n");
520  // Checking tids.count(self) ensures that we only try to access the current module in
521  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
522  // time the thread is registered, so any lazy allocation will have been done at that
523  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
524  // is allocated at exec time, not lazily.
525  if (tids.count(self) > 0) {
526  char buff[moduleBufferSize] = "\nModule: ";
528  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
529  } else {
530  strlcat(buff, "none", moduleBufferSize);
531  }
532  strlcat(buff, " (crashed)", moduleBufferSize);
533  full_cerr_write(buff);
534  } else {
535  full_cerr_write("\nModule: non-CMSSW (crashed)");
536  }
538 #ifdef PAUSE_SIGNAL
539  // wait a short interval for the paused threads to resume and fill in their module
540  // information, then print
541  if (InitRootHandlers::doneModules_.is_lock_free()) {
542  int spincount = 0;
543  timespec t = { 0, 1000 };
544  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) { nanosleep(&t, nullptr); }
545  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
546  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
547  }
548  }
549 #endif
551  full_cerr_write("\n\nA fatal system signal has occurred: ");
552  full_cerr_write(signalname);
553  full_cerr_write("\n");
555  // For these four known cases, re-raise the signal so get the correct
556  // exit code.
557  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
558  {
559  signal(sig, SIG_DFL);
560  raise(sig);
561  }
562  else
563  {
564  ::abort();
565  }
566  }
568  void sig_abort(int sig, siginfo_t*, void*) {
569  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
571  // re-raise the signal to get the correct exit code
572  signal(sig, SIG_DFL);
573  raise(sig);
575  // shouldn't get here
576  ::sleep(10);
577  ::abort();
578  }
579  }
581  void set_default_signals() {
582  signal(SIGILL, SIG_DFL);
583  signal(SIGSEGV, SIG_DFL);
584  signal(SIGBUS, SIG_DFL);
585  signal(SIGTERM, SIG_DFL);
586  }
588 } // end of unnamed namespace
590 namespace edm {
591  namespace service {
593  /*
594  * We've run into issues where GDB fails to print the thread which calls clone().
595  * To avoid this problem, we have an alternate approach below where the signal handler
596  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
597  * invocation; we don't care if that thread is missing from the traceback in this case.
598  */
599  static void cmssw_stacktrace_fork();
602  {
603  int toParent = childToParent_[1];
604  int fromParent = parentToChild_[0];
605  char buf[2]; buf[1] = '\0';
607  while(true)
608  {
609  int result = full_read(fromParent, buf, 1);
610  if (result < 0)
611  {
612  // To avoid a deadlock (this function is NOT re-entrant), reset signals
613  // We never set them back to the CMSSW handler because we assume the parent
614  // thread will abort for us.
615  set_default_signals();
616  close(toParent);
617  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
618  full_cerr_write(strerror(-result));
619  full_cerr_write("\n");
620  ::abort();
621  }
622  if (buf[0] == '1')
623  {
624  set_default_signals();
626  full_write(toParent, buf);
627  }
628  else if (buf[0] == '2')
629  {
630  // We have just finished forking. Reload the file descriptors for thread
631  // communication.
632  close(toParent);
633  close(fromParent);
634  toParent = childToParent_[1];
635  fromParent = parentToChild_[0];
636  }
637  else if (buf[0] == '3')
638  {
639  break;
640  }
641  else
642  {
643  set_default_signals();
644  close(toParent);
645  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
646  full_cerr_write(buf);
647  full_cerr_write("\n");
648  ::abort();
649  }
650  }
651  }
654  {
655  int result = full_write(parentToChild_[1], "1");
656  if (result < 0)
657  {
658  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
659  full_cerr_write(strerror(-result));
660  full_cerr_write("\n");
661  return;
662  }
663  char buf[2]; buf[1] = '\0';
664  if ((result = full_read(childToParent_[0], buf, 1, 5*60)) < 0)
665  {
666  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
667  if (result == -ETIMEDOUT)
668  {
669  full_cerr_write("timed out waiting for GDB to complete.");
670  }
671  else
672  {
673  full_cerr_write(strerror(-result));
674  }
675  full_cerr_write("\n");
676  return;
677  }
678  }
681  {
682  char child_stack[4*1024];
683  char *child_stack_ptr = child_stack + 4*1024;
684  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
685  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
686  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
687  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
688  int pid =
689 #ifdef __linux__
690  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
691 #else
692  fork();
693  if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
694  if (pid == 0) {edm::service::cmssw_stacktrace(nullptr); ::abort();}
695 #endif
696  if (pid == -1)
697  {
698  full_cerr_write("(Attempt to perform stack dump failed.)\n");
699  }
700  else
701  {
702  int status;
703  if (waitpid(pid, &status, 0) == -1)
704  {
705  full_cerr_write("(Failed to wait on stack dump output.)\n");
706  }
707  if (status)
708  {
709  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
710  }
711  }
712  }
714  int cmssw_stacktrace(void * /*arg*/)
715  {
717  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
718  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
719  // calls dlsym.
720 #ifdef __linux__
721  syscall(SYS_execve, "/bin/sh", argv, __environ);
722 #else
723  execv("/bin/sh", argv);
724 #endif
725  ::abort();
726  return 1;
727  }
729  namespace {
731  void localInitializeThisThreadForUse() {
732  static thread_local TThread guard;
733  }
735  class InitializeThreadTask : public tbb::task {
736  public:
737  InitializeThreadTask(std::atomic<unsigned int>* counter,
738  tbb::task* waitingTask):
739  threadsLeft_(counter),
740  waitTask_(waitingTask) {}
742  tbb::task* execute() override {
743  //For each tbb thread, setup the initialization
744  // required by ROOT and then wait until all
745  // threads have done so in order to guarantee the all get setup
747  localInitializeThisThreadForUse();
748  (*threadsLeft_)--;
749  while(0 != threadsLeft_->load());
750  waitTask_->decrement_ref_count();
751  return nullptr;
752  }
753  private:
754  std::atomic<unsigned int>* threadsLeft_;
755  tbb::task* waitTask_;
756  };
757  }
759  static char pstackName[] = "(CMSSW stack trace helper)";
760  static char dashC[] = "-c";
763  int InitRootHandlers::parentToChild_[2] = {-1, -1};
764  int InitRootHandlers::childToParent_[2] = {-1, -1};
765  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
767  std::vector<std::array<char,moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
768  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
773  : RootHandlers(),
774  unloadSigHandler_(pset.getUntrackedParameter<bool> ("UnloadRootSigHandler")),
775  resetErrHandler_(pset.getUntrackedParameter<bool> ("ResetRootErrHandler")),
776  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
777  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool> ("AutoLibraryLoader"))
778  {
779  stackTracePause_ = pset.getUntrackedParameter<int> ("StackTracePauseTime");
781  if(unloadSigHandler_) {
782  // Deactivate all the Root signal handlers and restore the system defaults
783  gSystem->ResetSignal(kSigChild);
784  gSystem->ResetSignal(kSigBus);
785  gSystem->ResetSignal(kSigSegmentationViolation);
786  gSystem->ResetSignal(kSigIllegalInstruction);
787  gSystem->ResetSignal(kSigSystem);
788  gSystem->ResetSignal(kSigPipe);
789  gSystem->ResetSignal(kSigAlarm);
790  gSystem->ResetSignal(kSigUrgent);
791  gSystem->ResetSignal(kSigFloatingException);
792  gSystem->ResetSignal(kSigWindowChanged);
793  } else if(pset.getUntrackedParameter<bool>("AbortOnSignal")){
794  cachePidInfo();
796  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
797  // it requires an TApplication to be instantiated which causes problems
798  gSystem->ResetSignal(kSigBus);
799  gSystem->ResetSignal(kSigSegmentationViolation);
800  gSystem->ResetSignal(kSigIllegalInstruction);
801  installCustomHandler(SIGBUS,sig_dostack_then_abort);
802  sigBusHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
803  installCustomHandler(SIGBUS,sig_abort);
804  });
805  installCustomHandler(SIGSEGV,sig_dostack_then_abort);
806  sigSegvHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
807  installCustomHandler(SIGSEGV,sig_abort);
808  });
809  installCustomHandler(SIGILL,sig_dostack_then_abort);
810  sigIllHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
811  installCustomHandler(SIGILL,sig_abort);
812  });
813  installCustomHandler(SIGTERM,sig_dostack_then_abort);
814  sigTermHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
815  installCustomHandler(SIGTERM,sig_abort);
816  });
818  }
820  //Initialize each TBB thread so ROOT knows about them
821  iReg.watchPreallocate( [](service::SystemBounds const& iBounds) {
822  auto const nThreads =iBounds.maxNumberOfThreads();
823  if(nThreads > 1) {
824  std::atomic<unsigned int> threadsLeft{nThreads};
826  std::shared_ptr<tbb::empty_task> waitTask{new (tbb::task::allocate_root()) tbb::empty_task{},
827  [](tbb::empty_task* iTask){tbb::task::destroy(*iTask);} };
829  waitTask->set_ref_count(1+nThreads);
830  for(unsigned int i=0; i<nThreads;++i) {
831  tbb::task::spawn( *( new(tbb::task::allocate_root()) InitializeThreadTask(&threadsLeft, waitTask.get())));
832  }
834  waitTask->wait_for_all();
836  }
837  }
838  );
840  iReg.watchPreallocate([this](edm::service::SystemBounds const& iBounds){
841  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
842  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
843  }
844  });
846  if(resetErrHandler_) {
848  // Replace the Root error handler with one that uses the MessageLogger
849  SetErrorHandler(RootErrorHandler);
850  }
852  // Enable automatic Root library loading.
853  if(autoLibraryLoader_) {
854  gInterpreter->SetClassAutoloading(1);
855  }
857  // Set ROOT parameters.
858  TTree::SetMaxTreeSize(kMaxLong64);
859  TH1::AddDirectory(kFALSE);
860  //G__SetCatchException(0);
862  // Set custom streamers
865  // Load the library containing dictionaries for std:: classes, if not already loaded.
866  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int> >))) {
867  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
868  }
870  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
871  if(debugLevel >0) {
872  gDebug = debugLevel;
873  }
874  }
877  // close all open ROOT files
878  TIter iter(gROOT->GetListOfFiles());
879  TObject *obj = nullptr;
880  while(nullptr != (obj = iter.Next())) {
881  TFile* f = dynamic_cast<TFile*>(obj);
882  if(f) {
883  // We get a new iterator each time,
884  // because closing a file can invalidate the iterator
885  f->Close();
886  iter = TIter(gROOT->GetListOfFiles());
887  }
888  }
889  }
892  //Tell Root we want to be multi-threaded
893  TThread::Initialize();
894  //When threading, also have to keep ROOT from logging all TObjects into a list
895  TObject::SetObjectStat(false);
897  //Have to avoid having Streamers modify themselves after they have been used
898  TVirtualStreamerInfo::Optimize(false);
899  }
902  localInitializeThisThreadForUse();
903  }
907  desc.setComment("Centralized interface to ROOT.");
908  desc.addUntracked<bool>("UnloadRootSigHandler", false)
909  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
910  desc.addUntracked<bool>("ResetRootErrHandler", true)
911  ->setComment("If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
912  desc.addUntracked<bool>("AutoLibraryLoader", true)
913  ->setComment("If True, enables automatic loading of data dictionaries.");
914  desc.addUntracked<bool>("LoadAllDictionaries",false)
915  ->setComment("If True, loads all ROOT dictionaries.");
916  desc.addUntracked<bool>("AbortOnSignal",true)
917  ->setComment("If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
918  desc.addUntracked<int>("DebugLevel",0)
919  ->setComment("Sets ROOT's gDebug value.");
920  desc.addUntracked<int>("StackTracePauseTime", 300)
921  ->setComment("Seconds to pause other threads during stack trace.");
922  descriptions.add("InitRootHandlers", desc);
923  }
925  char *const *
927  return pstackArgv_;
928  }
930  void
932  s_ignoreWarnings =false;
933  }
935  void
937  s_ignoreWarnings = true;
938  }
940  void
942  {
943  if (snprintf(pidString_, pidStringLength_-1, "gdb -quiet -p %d 2>&1 <<EOF |\n"
944  "set width 0\n"
945  "set height 0\n"
946  "set pagination no\n"
947  "thread apply all bt\n"
948  "EOF\n"
949  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >= pidStringLength_)
950  {
951  std::ostringstream sstr;
952  sstr << "Unable to pre-allocate stacktrace handler information";
953  edm::Exception except(edm::errors::OtherCMS, sstr.str());
954  throw except;
955  }
957  // These are initialized to -1; harmless to close an invalid FD.
958  // If this is called post-fork, we don't want to be communicating on
959  // these FDs as they are used internally by the parent.
960  close(childToParent_[0]);
961  close(childToParent_[1]);
962  childToParent_[0] = -1; childToParent_[1] = -1;
963  close(parentToChild_[0]);
964  close(parentToChild_[1]);
965  parentToChild_[0] = -1; parentToChild_[1] = -1;
967  if (-1 == pipe2(childToParent_, O_CLOEXEC))
968  {
969  std::ostringstream sstr;
970  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
971  edm::Exception except(edm::errors::OtherCMS, sstr.str());
972  throw except;
973  }
975  if (-1 == pipe2(parentToChild_, O_CLOEXEC))
976  {
977  close(childToParent_[0]); close(childToParent_[1]);
978  childToParent_[0] = -1; childToParent_[1] = -1;
979  std::ostringstream sstr;
980  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
981  edm::Exception except(edm::errors::OtherCMS, sstr.str());
982  throw except;
983  }
985  helperThread_.reset(new std::thread(stacktraceHelperThread));
986  helperThread_->detach();
987  }
989  } // end of namespace service
990 } // end of namespace edm
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
T getUntrackedParameter(std::string const &, T const &) const
int i
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:117
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
def destroy(e)
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:99
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
#define NULL
Definition: scimark2.h:8
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
#define constexpr
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
std::atomic< unsigned int > * threadsLeft_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
virtual void initializeThisThreadForUse() override
virtual void ignoreWarnings_() override
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
double f[11][100]
int cmssw_stacktrace(void *)
tbb::task * waitTask_
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
static void fillDescriptions(ConfigurationDescriptions &descriptions)
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
def write(self, setup)