CMS 3D CMS Logo

InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/task.h"
22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
24 #include <thread>
25 #include <sys/wait.h>
26 #include <sstream>
27 #include <string.h>
28 #include <poll.h>
29 #include <atomic>
30 
31 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
32 // version. This can break our stack trace printer. Avoid this by
33 // invoking the syscall directly.
34 #ifdef __linux__
35 #include <syscall.h>
36 #endif
37 
38 #include "TROOT.h"
39 #include "TError.h"
40 #include "TFile.h"
41 #include "TInterpreter.h"
42 #include "TH1.h"
43 #include "TSystem.h"
44 #include "TUnixSystem.h"
45 #include "TTree.h"
46 #include "TVirtualStreamerInfo.h"
47 
48 #include "TClassTable.h"
49 
50 #include <memory>
51 
52 namespace {
53  // size of static buffer allocated for listing module names following a
54  // stacktrace abort
55  constexpr std::size_t moduleBufferSize = 128;
56 }
57 
58 namespace edm {
60  class ParameterSet;
61  class ActivityRegistry;
62 
63  namespace service {
64  class InitRootHandlers : public RootHandlers {
65 
66  friend int cmssw_stacktrace(void *);
67 
68  public:
69  class ThreadTracker : public tbb::task_scheduler_observer {
70  public:
71  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
72 
73  ThreadTracker() : tbb::task_scheduler_observer() {
74  observe(true);
75  }
76  void on_scheduler_entry(bool) {
77  // ensure thread local has been allocated; not necessary on Linux with
78  // the current cmsRun linkage, but could be an issue if the platform
79  // or linkage leads to "lazy" allocation of the thread local. By
80  // referencing it here we make sure it has been allocated and can be
81  // accessed safely from our signal handler.
83  threadIDs_.insert(pthread_self());
84  }
85  const Container_type& IDs() { return threadIDs_; }
86 
87  private:
88  Container_type threadIDs_;
89  };
90 
91  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
92  virtual ~InitRootHandlers();
93 
94  static void fillDescriptions(ConfigurationDescriptions& descriptions);
95  static void stacktraceFromThread();
97  static int stackTracePause() { return stackTracePause_; }
98 
99  static std::vector<std::array<char,moduleBufferSize>> moduleListBuffers_;
100  static std::atomic<std::size_t> nextModule_, doneModules_;
101  private:
102  static char *const *getPstackArgv();
103  virtual void enableWarnings_() override;
104  virtual void ignoreWarnings_() override;
105  virtual void willBeUsingThreads() override;
106 
107  void cachePidInfoHandler(unsigned int, unsigned int) {
108  //this is called only on a fork, so the thread doesn't
109  // actually exist anymore
110  helperThread_.reset();
111  cachePidInfo();}
112  void cachePidInfo();
113  static void stacktraceHelperThread();
114 
115  static const int pidStringLength_ = 200;
117  static char * const pstackArgv_[];
118  static int parentToChild_[2];
119  static int childToParent_[2];
120  static std::unique_ptr<std::thread> helperThread_;
122  static int stackTracePause_;
123 
128  std::shared_ptr<const void> sigBusHandler_;
129  std::shared_ptr<const void> sigSegvHandler_;
130  std::shared_ptr<const void> sigIllHandler_;
131  std::shared_ptr<const void> sigTermHandler_;
132  };
133 
134  inline
136  return true;
137  }
138 
139  } // end of namespace service
140 } // end of namespace edm
141 
142 namespace edm {
143  namespace service {
144  int cmssw_stacktrace(void *);
145  }
146 }
147 
148 namespace {
149  enum class SeverityLevel {
150  kInfo,
151  kWarning,
152  kError,
153  kSysError,
154  kFatal
155  };
156 
157  static thread_local bool s_ignoreWarnings = false;
158 
159  static bool s_ignoreEverything = false;
160 
161  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
162 
163  bool die = false;
164 
165  // Translate ROOT severity level to MessageLogger severity level
166 
167  SeverityLevel el_severity = SeverityLevel::kInfo;
168 
169  if (level >= kFatal) {
170  el_severity = SeverityLevel::kFatal;
171  } else if (level >= kSysError) {
172  el_severity = SeverityLevel::kSysError;
173  } else if (level >= kError) {
174  el_severity = SeverityLevel::kError;
175  } else if (level >= kWarning) {
176  el_severity = s_ignoreWarnings ? SeverityLevel::kInfo : SeverityLevel::kWarning;
177  }
178 
179  if(s_ignoreEverything) {
180  el_severity = SeverityLevel::kInfo;
181  }
182 
183  // Adapt C-strings to std::strings
184  // Arrange to report the error location as furnished by Root
185 
186  std::string el_location = "@SUB=?";
187  if (location != 0) el_location = std::string("@SUB=")+std::string(location);
188 
189  std::string el_message = "?";
190  if (message != 0) el_message = message;
191 
192  // Try to create a meaningful id string using knowledge of ROOT error messages
193  //
194  // id == "ROOT-ClassName" where ClassName is the affected class
195  // else "ROOT/ClassName" where ClassName is the error-declaring class
196  // else "ROOT"
197 
198  std::string el_identifier = "ROOT";
199 
200  std::string precursor("class ");
201  size_t index1 = el_message.find(precursor);
202  if (index1 != std::string::npos) {
203  size_t index2 = index1 + precursor.length();
204  size_t index3 = el_message.find_first_of(" :", index2);
205  if (index3 != std::string::npos) {
206  size_t substrlen = index3-index2;
207  el_identifier += "-";
208  el_identifier += el_message.substr(index2,substrlen);
209  }
210  } else {
211  index1 = el_location.find("::");
212  if (index1 != std::string::npos) {
213  el_identifier += "/";
214  el_identifier += el_location.substr(0, index1);
215  }
216  }
217 
218  // Intercept some messages and upgrade the severity
219 
220  if ((el_location.find("TBranchElement::Fill") != std::string::npos)
221  && (el_message.find("fill branch") != std::string::npos)
222  && (el_message.find("address") != std::string::npos)
223  && (el_message.find("not set") != std::string::npos)) {
224  el_severity = SeverityLevel::kFatal;
225  }
226 
227  if ((el_message.find("Tree branches") != std::string::npos)
228  && (el_message.find("different numbers of entries") != std::string::npos)) {
229  el_severity = SeverityLevel::kFatal;
230  }
231 
232 
233  // Intercept some messages and downgrade the severity
234 
235  if ((el_message.find("no dictionary for class") != std::string::npos) ||
236  (el_message.find("already in TClassTable") != std::string::npos) ||
237  (el_message.find("matrix not positive definite") != std::string::npos) ||
238  (el_message.find("not a TStreamerInfo object") != std::string::npos) ||
239  (el_message.find("Problems declaring payload") != std::string::npos) ||
240  (el_message.find("Announced number of args different from the real number of argument passed") != std::string::npos) || // Always printed if gDebug>0 - regardless of whether warning message is real.
241  (el_location.find("Fit") != std::string::npos) ||
242  (el_location.find("TDecompChol::Solve") != std::string::npos) ||
243  (el_location.find("THistPainter::PaintInit") != std::string::npos) ||
244  (el_location.find("TUnixSystem::SetDisplay") != std::string::npos) ||
245  (el_location.find("TGClient::GetFontByName") != std::string::npos) ||
246  (el_location.find("Inverter::Dinv") != std::string::npos) ||
247  (el_message.find("nbins is <=0 - set to nbins = 1") != std::string::npos) ||
248  (el_message.find("nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
249  (level < kError and
250  (el_location.find("CINTTypedefBuilder::Setup")!= std::string::npos) and
251  (el_message.find("possible entries are in use!") != std::string::npos))) {
252  el_severity = SeverityLevel::kInfo;
253  }
254 
255  if (el_severity == SeverityLevel::kInfo) {
256  // Don't throw if the message is just informational.
257  die = false;
258  } else {
259  die = true;
260  }
261 
262  // Feed the message to the MessageLogger and let it choose to suppress or not.
263 
264  // Root has declared a fatal error. Throw an EDMException unless the
265  // message corresponds to a pending signal. In that case, do not throw
266  // but let the OS deal with the signal in the usual way.
267  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
268  std::ostringstream sstr;
269  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
270  edm::Exception except(edm::errors::FatalRootError, sstr.str());
271  except.addAdditionalInfo(except.message());
272  except.clearMessage();
273  throw except;
274 
275  }
276 
277  // Typically, we get here only for informational messages,
278  // but we leave the other code in just in case we change
279  // the criteria for throwing.
280  if (el_severity == SeverityLevel::kFatal) {
281  edm::LogError("Root_Fatal") << el_location << el_message;
282  } else if (el_severity == SeverityLevel::kSysError) {
283  edm::LogError("Root_Severe") << el_location << el_message;
284  } else if (el_severity == SeverityLevel::kError) {
285  edm::LogError("Root_Error") << el_location << el_message;
286  } else if (el_severity == SeverityLevel::kWarning) {
287  edm::LogWarning("Root_Warning") << el_location << el_message ;
288  } else if (el_severity == SeverityLevel::kInfo) {
289  edm::LogInfo("Root_Information") << el_location << el_message ;
290  }
291  }
292 
293  void RootErrorHandler(int level, bool, char const* location, char const* message) {
294  RootErrorHandlerImpl(level, location, message);
295  }
296 
297  extern "C" {
298 
299  static int full_write(int fd, const char *text)
300  {
301  const char *buffer = text;
302  size_t count = strlen(text);
303  ssize_t written = 0;
304  while (count)
305  {
306  written = write(fd, buffer, count);
307  if (written == -1)
308  {
309  if (errno == EINTR) {continue;}
310  else {return -errno;}
311  }
312  count -= written;
313  buffer += written;
314  }
315  return 0;
316  }
317 
318  static int full_read(int fd, char *inbuf, size_t len, int timeout_s=-1)
319  {
320  char *buf = inbuf;
321  size_t count = len;
322  ssize_t complete = 0;
323  std::chrono::time_point<std::chrono::steady_clock> end_time = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
324  int flags;
325  if (timeout_s < 0)
326  {
327  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
328  }
329  else if ((-1 == (flags = fcntl(fd, F_GETFL))))
330  {
331  return -errno;
332  }
333  if ((flags & O_NONBLOCK) != O_NONBLOCK)
334  {
335  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
336  {
337  return -errno;
338  }
339  }
340  while (count)
341  {
342  if (timeout_s >= 0)
343  {
344  struct pollfd poll_info{fd, POLLIN, 0};
345  int ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(end_time-std::chrono::steady_clock::now()).count();
346  if (ms_remaining > 0)
347  {
348  if (poll(&poll_info, 1, ms_remaining) == 0)
349  {
350  if ((flags & O_NONBLOCK) != O_NONBLOCK)
351  {
352  fcntl(fd, F_SETFL, flags);
353  }
354  return -ETIMEDOUT;
355  }
356  }
357  else if (ms_remaining < 0)
358  {
359  if ((flags & O_NONBLOCK) != O_NONBLOCK)
360  {
361  fcntl(fd, F_SETFL, flags);
362  }
363  return -ETIMEDOUT;
364  }
365  }
366  complete = read(fd, buf, count);
367  if (complete == -1)
368  {
369  if (errno == EINTR) {continue;}
370  else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {continue;}
371  else
372  {
373  int orig_errno = errno;
374  if ((flags & O_NONBLOCK) != O_NONBLOCK)
375  {
376  fcntl(fd, F_SETFL, flags);
377  }
378  return -orig_errno;
379  }
380  }
381  count -= complete;
382  buf += complete;
383  }
384  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
385  fcntl(fd, F_SETFL, flags);
386  }
387  return 0;
388  }
389 
390  static int full_cerr_write(const char *text)
391  {
392  return full_write(2, text);
393  }
394 
395 // these signals are only used inside the stacktrace signal handler,
396 // so common signals can be used. They do have to be different, since
397 // we do not set SA_NODEFER, and RESUME must be a signal that will
398 // cause sleep() to return early.
399 #if defined(SIGRTMAX)
400 #define PAUSE_SIGNAL SIGRTMAX
401 #define RESUME_SIGNAL SIGRTMAX-1
402 #elif defined(SIGINFO) // macOS/BSD
403 #define PAUSE_SIGNAL SIGINFO
404 #define RESUME_SIGNAL SIGALRM
405 #endif
406 
407  // does nothing, here only to interrupt the sleep() in the pause handler
408  void sig_resume_handler(int sig, siginfo_t*, void*) {}
409 
410  // pause a thread so that a (slow) stacktrace will capture the current state
411  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
412  using namespace edm::service;
413 
414 #ifdef RESUME_SIGNAL
415  sigset_t sigset;
416  sigemptyset(&sigset);
417  sigaddset(&sigset, RESUME_SIGNAL);
418  pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
419 #endif
420  // sleep interrrupts on a handled delivery of the resume signal
422 
423  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
426  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
427 
428  strlcpy(buff, "\nModule: ", moduleBufferSize);
430  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
431  } else {
432  strlcat(buff, "none", moduleBufferSize);
433  }
435  }
436  }
437  }
438 
439  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
440  using namespace edm::service;
441 
442  const auto& tids = InitRootHandlers::threadIDs();
443 
444  const auto self = pthread_self();
445 #ifdef PAUSE_SIGNAL
446  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
447  // install the "pause" handler
448  struct sigaction act;
449  act.sa_sigaction = sig_pause_for_stacktrace;
450  act.sa_flags = 0;
451  sigemptyset(&act.sa_mask);
452  sigaction(PAUSE_SIGNAL, &act, NULL);
453 
454  // unblock pause signal globally, resume is unblocked in the pause handler
455  sigset_t pausesigset;
456  sigemptyset(&pausesigset);
457  sigaddset(&pausesigset, PAUSE_SIGNAL);
458  sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
459 
460  // send a pause signal to all CMSSW/TBB threads other than self
461  for (auto id : tids) {
462  if (self != id) {
463  pthread_kill(id, PAUSE_SIGNAL);
464  }
465  }
466 
467 #ifdef RESUME_SIGNAL
468  // install the "resume" handler
469  act.sa_sigaction = sig_resume_handler;
470  sigaction(RESUME_SIGNAL, &act, NULL);
471 #endif
472  }
473 #endif
474 
475  const char* signalname = "unknown";
476  switch (sig) {
477  case SIGBUS:
478  {
479  signalname = "bus error";
480  break;
481  }
482  case SIGSEGV:
483  {
484  signalname = "segmentation violation";
485  break;
486  }
487  case SIGILL:
488  {
489  signalname = "illegal instruction";
490  break;
491  }
492  case SIGTERM:
493  {
494  signalname = "external termination request";
495  break;
496  }
497  default:
498  break;
499  }
500  full_cerr_write("\n\nA fatal system signal has occurred: ");
501  full_cerr_write(signalname);
502  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
503 
505 
506  // resume the signal handlers to store the current module; we are not guaranteed they
507  // will have time to store their modules, so there is a race condition; this could be
508  // avoided by storing the module information before sleeping, a change that may be
509  // made when we're convinced accessing the thread-local current module is safe.
510 #ifdef RESUME_SIGNAL
511  std::size_t notified = 0;
512  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
513  for (auto id : tids) {
514  if (self != id) {
515  if (pthread_kill(id, RESUME_SIGNAL) == 0) ++notified;
516  }
517  }
518  }
519 #endif
520 
521  full_cerr_write("\nCurrent Modules:\n");
522 
523  // Checking tids.count(self) ensures that we only try to access the current module in
524  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
525  // time the thread is registered, so any lazy allocation will have been done at that
526  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
527  // is allocated at exec time, not lazily.
528  if (tids.count(self) > 0) {
529  char buff[moduleBufferSize] = "\nModule: ";
531  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
532  } else {
533  strlcat(buff, "none", moduleBufferSize);
534  }
535  strlcat(buff, " (crashed)", moduleBufferSize);
536  full_cerr_write(buff);
537  } else {
538  full_cerr_write("\nModule: non-CMSSW (crashed)");
539  }
540 
541 #ifdef PAUSE_SIGNAL
542  // wait a short interval for the paused threads to resume and fill in their module
543  // information, then print
544  if (InitRootHandlers::doneModules_.is_lock_free()) {
545  int spincount = 0;
546  timespec t = { 0, 1000 };
547  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) { nanosleep(&t, nullptr); }
548  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
549  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
550  }
551  }
552 #endif
553 
554  full_cerr_write("\n\nA fatal system signal has occurred: ");
555  full_cerr_write(signalname);
556  full_cerr_write("\n");
557 
558  // For these four known cases, re-raise the signal so get the correct
559  // exit code.
560  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
561  {
562  signal(sig, SIG_DFL);
563  raise(sig);
564  }
565  else
566  {
567  ::abort();
568  }
569  }
570 
571  void sig_abort(int sig, siginfo_t*, void*) {
572  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
573 
574  // re-raise the signal to get the correct exit code
575  signal(sig, SIG_DFL);
576  raise(sig);
577 
578  // shouldn't get here
579  ::sleep(10);
580  ::abort();
581  }
582  }
583 
584  void set_default_signals() {
585  signal(SIGILL, SIG_DFL);
586  signal(SIGSEGV, SIG_DFL);
587  signal(SIGBUS, SIG_DFL);
588  signal(SIGTERM, SIG_DFL);
589  }
590 
591 } // end of unnamed namespace
592 
593 namespace edm {
594  namespace service {
595 
596  /*
597  * We've run into issues where GDB fails to print the thread which calls clone().
598  * To avoid this problem, we have an alternate approach below where the signal handler
599  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
600  * invocation; we don't care if that thread is missing from the traceback in this case.
601  */
602  static void cmssw_stacktrace_fork();
603 
605  {
606  int toParent = childToParent_[1];
607  int fromParent = parentToChild_[0];
608  char buf[2]; buf[1] = '\0';
609 
610  while(true)
611  {
612  int result = full_read(fromParent, buf, 1);
613  if (result < 0)
614  {
615  // To avoid a deadlock (this function is NOT re-entrant), reset signals
616  // We never set them back to the CMSSW handler because we assume the parent
617  // thread will abort for us.
618  set_default_signals();
619  close(toParent);
620  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
621  full_cerr_write(strerror(-result));
622  full_cerr_write("\n");
623  ::abort();
624  }
625  if (buf[0] == '1')
626  {
627  set_default_signals();
629  full_write(toParent, buf);
630  }
631  else if (buf[0] == '2')
632  {
633  // We have just finished forking. Reload the file descriptors for thread
634  // communication.
635  close(toParent);
636  close(fromParent);
637  toParent = childToParent_[1];
638  fromParent = parentToChild_[0];
639  }
640  else if (buf[0] == '3')
641  {
642  break;
643  }
644  else
645  {
646  set_default_signals();
647  close(toParent);
648  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
649  full_cerr_write(buf);
650  full_cerr_write("\n");
651  ::abort();
652  }
653  }
654  }
655 
657  {
658  int result = full_write(parentToChild_[1], "1");
659  if (result < 0)
660  {
661  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
662  full_cerr_write(strerror(-result));
663  full_cerr_write("\n");
664  return;
665  }
666  char buf[2]; buf[1] = '\0';
667  if ((result = full_read(childToParent_[0], buf, 1, 5*60)) < 0)
668  {
669  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
670  if (result == -ETIMEDOUT)
671  {
672  full_cerr_write("timed out waiting for GDB to complete.");
673  }
674  else
675  {
676  full_cerr_write(strerror(-result));
677  }
678  full_cerr_write("\n");
679  return;
680  }
681  }
682 
684  {
685  char child_stack[4*1024];
686  char *child_stack_ptr = child_stack + 4*1024;
687  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
688  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
689  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
690  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
691  int pid =
692 #ifdef __linux__
693  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
694 #else
695  fork();
696  if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
697  if (pid == 0) {edm::service::cmssw_stacktrace(nullptr); ::abort();}
698 #endif
699  if (pid == -1)
700  {
701  full_cerr_write("(Attempt to perform stack dump failed.)\n");
702  }
703  else
704  {
705  int status;
706  if (waitpid(pid, &status, 0) == -1)
707  {
708  full_cerr_write("(Failed to wait on stack dump output.)\n");
709  }
710  if (status)
711  {
712  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
713  }
714  }
715  }
716 
717  int cmssw_stacktrace(void * /*arg*/)
718  {
720  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
721  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
722  // calls dlsym.
723 #ifdef __linux__
724  syscall(SYS_execve, "/bin/sh", argv, __environ);
725 #else
726  execv("/bin/sh", argv);
727 #endif
728  ::abort();
729  return 1;
730  }
731 
732  static char pstackName[] = "(CMSSW stack trace helper)";
733  static char dashC[] = "-c";
736  int InitRootHandlers::parentToChild_[2] = {-1, -1};
737  int InitRootHandlers::childToParent_[2] = {-1, -1};
738  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
740  std::vector<std::array<char,moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
741  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
743 
744 
746  : RootHandlers(),
747  unloadSigHandler_(pset.getUntrackedParameter<bool> ("UnloadRootSigHandler")),
748  resetErrHandler_(pset.getUntrackedParameter<bool> ("ResetRootErrHandler")),
749  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
750  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool> ("AutoLibraryLoader"))
751  {
752  stackTracePause_ = pset.getUntrackedParameter<int> ("StackTracePauseTime");
753 
754  if(unloadSigHandler_) {
755  // Deactivate all the Root signal handlers and restore the system defaults
756  gSystem->ResetSignal(kSigChild);
757  gSystem->ResetSignal(kSigBus);
758  gSystem->ResetSignal(kSigSegmentationViolation);
759  gSystem->ResetSignal(kSigIllegalInstruction);
760  gSystem->ResetSignal(kSigSystem);
761  gSystem->ResetSignal(kSigPipe);
762  gSystem->ResetSignal(kSigAlarm);
763  gSystem->ResetSignal(kSigUrgent);
764  gSystem->ResetSignal(kSigFloatingException);
765  gSystem->ResetSignal(kSigWindowChanged);
766  } else if(pset.getUntrackedParameter<bool>("AbortOnSignal")){
767  cachePidInfo();
768 
769  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
770  // it requires an TApplication to be instantiated which causes problems
771  gSystem->ResetSignal(kSigBus);
772  gSystem->ResetSignal(kSigSegmentationViolation);
773  gSystem->ResetSignal(kSigIllegalInstruction);
774  installCustomHandler(SIGBUS,sig_dostack_then_abort);
775  sigBusHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
776  installCustomHandler(SIGBUS,sig_abort);
777  });
778  installCustomHandler(SIGSEGV,sig_dostack_then_abort);
779  sigSegvHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
780  installCustomHandler(SIGSEGV,sig_abort);
781  });
782  installCustomHandler(SIGILL,sig_dostack_then_abort);
783  sigIllHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
784  installCustomHandler(SIGILL,sig_abort);
785  });
786  installCustomHandler(SIGTERM,sig_dostack_then_abort);
787  sigTermHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
788  installCustomHandler(SIGTERM,sig_abort);
789  });
791  }
792 
793  iReg.watchPreallocate([this](edm::service::SystemBounds const& iBounds){
794  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
795  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
796  }
797  });
798 
799  if(resetErrHandler_) {
800 
801  // Replace the Root error handler with one that uses the MessageLogger
802  SetErrorHandler(RootErrorHandler);
803  }
804 
805  // Enable automatic Root library loading.
806  if(autoLibraryLoader_) {
807  gInterpreter->SetClassAutoloading(1);
808  }
809 
810  // Set ROOT parameters.
811  TTree::SetMaxTreeSize(kMaxLong64);
812  TH1::AddDirectory(kFALSE);
813  //G__SetCatchException(0);
814 
815  // Set custom streamers
817 
818  // Load the library containing dictionaries for std:: classes, if not already loaded.
819  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int> >))) {
820  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
821  }
822 
823  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
824  if(debugLevel >0) {
825  gDebug = debugLevel;
826  }
827 
828  // Enable Root implicit multi-threading
829  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
830  if (imt) ROOT::EnableImplicitMT();
831  }
832 
834  // close all open ROOT files
835  TIter iter(gROOT->GetListOfFiles());
836  TObject *obj = nullptr;
837  while(nullptr != (obj = iter.Next())) {
838  TFile* f = dynamic_cast<TFile*>(obj);
839  if(f) {
840  // We get a new iterator each time,
841  // because closing a file can invalidate the iterator
842  f->Close();
843  iter = TIter(gROOT->GetListOfFiles());
844  }
845  }
846  }
847 
849  //Tell Root we want to be multi-threaded
850  ROOT::EnableThreadSafety();
851 
852  //When threading, also have to keep ROOT from logging all TObjects into a list
853  TObject::SetObjectStat(false);
854 
855  //Have to avoid having Streamers modify themselves after they have been used
856  TVirtualStreamerInfo::Optimize(false);
857  }
858 
861  desc.setComment("Centralized interface to ROOT.");
862  desc.addUntracked<bool>("UnloadRootSigHandler", false)
863  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
864  desc.addUntracked<bool>("ResetRootErrHandler", true)
865  ->setComment("If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
866  desc.addUntracked<bool>("AutoLibraryLoader", true)
867  ->setComment("If True, enables automatic loading of data dictionaries.");
868  desc.addUntracked<bool>("LoadAllDictionaries",false)
869  ->setComment("If True, loads all ROOT dictionaries.");
870  desc.addUntracked<bool>("EnableIMT",false)
871  ->setComment("If True, calls ROOT::EnableImplicitMT().");
872  desc.addUntracked<bool>("AbortOnSignal",true)
873  ->setComment("If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
874  desc.addUntracked<int>("DebugLevel",0)
875  ->setComment("Sets ROOT's gDebug value.");
876  desc.addUntracked<int>("StackTracePauseTime", 300)
877  ->setComment("Seconds to pause other threads during stack trace.");
878  descriptions.add("InitRootHandlers", desc);
879  }
880 
881  char *const *
883  return pstackArgv_;
884  }
885 
886  void
888  s_ignoreWarnings =false;
889  }
890 
891  void
893  s_ignoreWarnings = true;
894  }
895 
896  void
898  {
899  if(helperThread_) {
900  //Another InitRootHandlers was initialized in this job, possibly
901  // because multiple EventProcessors are being used.
902  //In that case, we are already all setup
903  return;
904  }
905  if (snprintf(pidString_, pidStringLength_-1, "gdb -quiet -p %d 2>&1 <<EOF |\n"
906  "set width 0\n"
907  "set height 0\n"
908  "set pagination no\n"
909  "thread apply all bt\n"
910  "EOF\n"
911  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >= pidStringLength_)
912  {
913  std::ostringstream sstr;
914  sstr << "Unable to pre-allocate stacktrace handler information";
915  edm::Exception except(edm::errors::OtherCMS, sstr.str());
916  throw except;
917  }
918 
919  // These are initialized to -1; harmless to close an invalid FD.
920  // If this is called post-fork, we don't want to be communicating on
921  // these FDs as they are used internally by the parent.
922  close(childToParent_[0]);
923  close(childToParent_[1]);
924  childToParent_[0] = -1; childToParent_[1] = -1;
925  close(parentToChild_[0]);
926  close(parentToChild_[1]);
927  parentToChild_[0] = -1; parentToChild_[1] = -1;
928 
929  if (-1 == pipe2(childToParent_, O_CLOEXEC))
930  {
931  std::ostringstream sstr;
932  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
933  edm::Exception except(edm::errors::OtherCMS, sstr.str());
934  throw except;
935  }
936 
937  if (-1 == pipe2(parentToChild_, O_CLOEXEC))
938  {
939  close(childToParent_[0]); close(childToParent_[1]);
940  childToParent_[0] = -1; childToParent_[1] = -1;
941  std::ostringstream sstr;
942  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
943  edm::Exception except(edm::errors::OtherCMS, sstr.str());
944  throw except;
945  }
946 
947  helperThread_.reset(new std::thread(stacktraceHelperThread));
948  helperThread_->detach();
949  }
950 
951  } // end of namespace service
952 } // end of namespace edm
953 
957 
size
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
T getUntrackedParameter(std::string const &, T const &) const
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:117
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:99
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
#define NULL
Definition: scimark2.h:8
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
#define constexpr
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:60
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
virtual void ignoreWarnings_() override
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:235
double f[11][100]
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
static void fillDescriptions(ConfigurationDescriptions &descriptions)
SeverityLevel
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
def write(self, setup)