CMS 3D CMS Logo

InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/task.h"
22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
24 #include <thread>
25 #include <sys/wait.h>
26 #include <sstream>
27 #include <cstring>
28 #include <poll.h>
29 #include <atomic>
30 
31 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
32 // version. This can break our stack trace printer. Avoid this by
33 // invoking the syscall directly.
34 #ifdef __linux__
35 #include <syscall.h>
36 #endif
37 
38 #include "TROOT.h"
39 #include "TError.h"
40 #include "TFile.h"
41 #include "TInterpreter.h"
42 #include "TH1.h"
43 #include "TSystem.h"
44 #include "TUnixSystem.h"
45 #include "TTree.h"
46 #include "TVirtualStreamerInfo.h"
47 
48 #include "TClassTable.h"
49 
50 #include <memory>
51 
52 namespace {
53  // size of static buffer allocated for listing module names following a
54  // stacktrace abort
55  constexpr std::size_t moduleBufferSize = 128;
56 }
57 
58 namespace edm {
60  class ParameterSet;
61  class ActivityRegistry;
62 
63  namespace service {
64  class InitRootHandlers : public RootHandlers {
65 
66  friend int cmssw_stacktrace(void *);
67 
68  public:
69  class ThreadTracker : public tbb::task_scheduler_observer {
70  public:
71  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
72 
73  ThreadTracker() : tbb::task_scheduler_observer() {
74  observe(true);
75  }
76  void on_scheduler_entry(bool) override {
77  // ensure thread local has been allocated; not necessary on Linux with
78  // the current cmsRun linkage, but could be an issue if the platform
79  // or linkage leads to "lazy" allocation of the thread local. By
80  // referencing it here we make sure it has been allocated and can be
81  // accessed safely from our signal handler.
83  threadIDs_.insert(pthread_self());
84  }
85  const Container_type& IDs() { return threadIDs_; }
86 
87  private:
88  Container_type threadIDs_;
89  };
90 
91  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
92  ~InitRootHandlers() override;
93 
94  static void fillDescriptions(ConfigurationDescriptions& descriptions);
95  static void stacktraceFromThread();
97  static int stackTracePause() { return stackTracePause_; }
98 
99  static std::vector<std::array<char,moduleBufferSize>> moduleListBuffers_;
100  static std::atomic<std::size_t> nextModule_, doneModules_;
101  private:
102  static char *const *getPstackArgv();
103  void enableWarnings_() override;
104  void ignoreWarnings_() override;
105  void willBeUsingThreads() override;
106 
107  void cachePidInfo();
108  static void stacktraceHelperThread();
109 
110  static const int pidStringLength_ = 200;
112  static char * const pstackArgv_[];
113  static int parentToChild_[2];
114  static int childToParent_[2];
115  static std::unique_ptr<std::thread> helperThread_;
117  static int stackTracePause_;
118 
123  std::shared_ptr<const void> sigBusHandler_;
124  std::shared_ptr<const void> sigSegvHandler_;
125  std::shared_ptr<const void> sigIllHandler_;
126  std::shared_ptr<const void> sigTermHandler_;
127  };
128 
129  inline
131  return true;
132  }
133 
134  } // end of namespace service
135 } // end of namespace edm
136 
137 namespace edm {
138  namespace service {
139  int cmssw_stacktrace(void *);
140  }
141 }
142 
143 namespace {
144  enum class SeverityLevel {
145  kInfo,
146  kWarning,
147  kError,
148  kSysError,
149  kFatal
150  };
151 
152  thread_local bool s_ignoreWarnings = false;
153 
154  bool s_ignoreEverything = false;
155 
156  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
157 
158  bool die = false;
159 
160  // Translate ROOT severity level to MessageLogger severity level
161 
162  SeverityLevel el_severity = SeverityLevel::kInfo;
163 
164  if (level >= kFatal) {
165  el_severity = SeverityLevel::kFatal;
166  } else if (level >= kSysError) {
167  el_severity = SeverityLevel::kSysError;
168  } else if (level >= kError) {
169  el_severity = SeverityLevel::kError;
170  } else if (level >= kWarning) {
171  el_severity = s_ignoreWarnings ? SeverityLevel::kInfo : SeverityLevel::kWarning;
172  }
173 
174  if(s_ignoreEverything) {
175  el_severity = SeverityLevel::kInfo;
176  }
177 
178  // Adapt C-strings to std::strings
179  // Arrange to report the error location as furnished by Root
180 
181  std::string el_location = "@SUB=?";
182  if (location != nullptr) el_location = std::string("@SUB=")+std::string(location);
183 
184  std::string el_message = "?";
185  if (message != nullptr) el_message = message;
186 
187  // Try to create a meaningful id string using knowledge of ROOT error messages
188  //
189  // id == "ROOT-ClassName" where ClassName is the affected class
190  // else "ROOT/ClassName" where ClassName is the error-declaring class
191  // else "ROOT"
192 
193  std::string el_identifier = "ROOT";
194 
195  std::string precursor("class ");
196  size_t index1 = el_message.find(precursor);
197  if (index1 != std::string::npos) {
198  size_t index2 = index1 + precursor.length();
199  size_t index3 = el_message.find_first_of(" :", index2);
200  if (index3 != std::string::npos) {
201  size_t substrlen = index3-index2;
202  el_identifier += "-";
203  el_identifier += el_message.substr(index2,substrlen);
204  }
205  } else {
206  index1 = el_location.find("::");
207  if (index1 != std::string::npos) {
208  el_identifier += "/";
209  el_identifier += el_location.substr(0, index1);
210  }
211  }
212 
213  // Intercept some messages and upgrade the severity
214 
215  if ((el_location.find("TBranchElement::Fill") != std::string::npos)
216  && (el_message.find("fill branch") != std::string::npos)
217  && (el_message.find("address") != std::string::npos)
218  && (el_message.find("not set") != std::string::npos)) {
219  el_severity = SeverityLevel::kFatal;
220  }
221 
222  if ((el_message.find("Tree branches") != std::string::npos)
223  && (el_message.find("different numbers of entries") != std::string::npos)) {
224  el_severity = SeverityLevel::kFatal;
225  }
226 
227 
228  // Intercept some messages and downgrade the severity
229 
230  if ((el_message.find("no dictionary for class") != std::string::npos) ||
231  (el_message.find("already in TClassTable") != std::string::npos) ||
232  (el_message.find("matrix not positive definite") != std::string::npos) ||
233  (el_message.find("not a TStreamerInfo object") != std::string::npos) ||
234  (el_message.find("Problems declaring payload") != std::string::npos) ||
235  (el_message.find("Announced number of args different from the real number of argument passed") != std::string::npos) || // Always printed if gDebug>0 - regardless of whether warning message is real.
236  (el_location.find("Fit") != std::string::npos) ||
237  (el_location.find("TDecompChol::Solve") != std::string::npos) ||
238  (el_location.find("THistPainter::PaintInit") != std::string::npos) ||
239  (el_location.find("TUnixSystem::SetDisplay") != std::string::npos) ||
240  (el_location.find("TGClient::GetFontByName") != std::string::npos) ||
241  (el_location.find("Inverter::Dinv") != std::string::npos) ||
242  (el_message.find("nbins is <=0 - set to nbins = 1") != std::string::npos) ||
243  (el_message.find("nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
244  (level < kError and
245  (el_location.find("CINTTypedefBuilder::Setup")!= std::string::npos) and
246  (el_message.find("possible entries are in use!") != std::string::npos))) {
247  el_severity = SeverityLevel::kInfo;
248  }
249 
250  if (el_severity == SeverityLevel::kInfo) {
251  // Don't throw if the message is just informational.
252  die = false;
253  } else {
254  die = true;
255  }
256 
257  // Feed the message to the MessageLogger and let it choose to suppress or not.
258 
259  // Root has declared a fatal error. Throw an EDMException unless the
260  // message corresponds to a pending signal. In that case, do not throw
261  // but let the OS deal with the signal in the usual way.
262  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
263  std::ostringstream sstr;
264  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
265  edm::Exception except(edm::errors::FatalRootError, sstr.str());
266  except.addAdditionalInfo(except.message());
267  except.clearMessage();
268  throw except;
269 
270  }
271 
272  // Typically, we get here only for informational messages,
273  // but we leave the other code in just in case we change
274  // the criteria for throwing.
275  if (el_severity == SeverityLevel::kFatal) {
276  edm::LogError("Root_Fatal") << el_location << el_message;
277  } else if (el_severity == SeverityLevel::kSysError) {
278  edm::LogError("Root_Severe") << el_location << el_message;
279  } else if (el_severity == SeverityLevel::kError) {
280  edm::LogError("Root_Error") << el_location << el_message;
281  } else if (el_severity == SeverityLevel::kWarning) {
282  edm::LogWarning("Root_Warning") << el_location << el_message ;
283  } else if (el_severity == SeverityLevel::kInfo) {
284  edm::LogInfo("Root_Information") << el_location << el_message ;
285  }
286  }
287 
288  void RootErrorHandler(int level, bool, char const* location, char const* message) {
289  RootErrorHandlerImpl(level, location, message);
290  }
291 
292  extern "C" {
293 
294  static int full_write(int fd, const char *text)
295  {
296  const char *buffer = text;
297  size_t count = strlen(text);
298  ssize_t written = 0;
299  while (count)
300  {
301  written = write(fd, buffer, count);
302  if (written == -1)
303  {
304  if (errno == EINTR) {continue;}
305  else {return -errno;}
306  }
307  count -= written;
308  buffer += written;
309  }
310  return 0;
311  }
312 
313  static int full_read(int fd, char *inbuf, size_t len, int timeout_s=-1)
314  {
315  char *buf = inbuf;
316  size_t count = len;
317  ssize_t complete = 0;
318  std::chrono::time_point<std::chrono::steady_clock> end_time = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
319  int flags;
320  if (timeout_s < 0)
321  {
322  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
323  }
324  else if ((-1 == (flags = fcntl(fd, F_GETFL))))
325  {
326  return -errno;
327  }
328  if ((flags & O_NONBLOCK) != O_NONBLOCK)
329  {
330  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
331  {
332  return -errno;
333  }
334  }
335  while (count)
336  {
337  if (timeout_s >= 0)
338  {
339  struct pollfd poll_info{fd, POLLIN, 0};
340  int ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(end_time-std::chrono::steady_clock::now()).count();
341  if (ms_remaining > 0)
342  {
343  if (poll(&poll_info, 1, ms_remaining) == 0)
344  {
345  if ((flags & O_NONBLOCK) != O_NONBLOCK)
346  {
347  fcntl(fd, F_SETFL, flags);
348  }
349  return -ETIMEDOUT;
350  }
351  }
352  else if (ms_remaining < 0)
353  {
354  if ((flags & O_NONBLOCK) != O_NONBLOCK)
355  {
356  fcntl(fd, F_SETFL, flags);
357  }
358  return -ETIMEDOUT;
359  }
360  }
361  complete = read(fd, buf, count);
362  if (complete == -1)
363  {
364  if (errno == EINTR) {continue;}
365  else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {continue;}
366  else
367  {
368  int orig_errno = errno;
369  if ((flags & O_NONBLOCK) != O_NONBLOCK)
370  {
371  fcntl(fd, F_SETFL, flags);
372  }
373  return -orig_errno;
374  }
375  }
376  count -= complete;
377  buf += complete;
378  }
379  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
380  fcntl(fd, F_SETFL, flags);
381  }
382  return 0;
383  }
384 
385  static int full_cerr_write(const char *text)
386  {
387  return full_write(2, text);
388  }
389 
390 // these signals are only used inside the stacktrace signal handler,
391 // so common signals can be used. They do have to be different, since
392 // we do not set SA_NODEFER, and RESUME must be a signal that will
393 // cause sleep() to return early.
394 #if defined(SIGRTMAX)
395 #define PAUSE_SIGNAL SIGRTMAX
396 #define RESUME_SIGNAL SIGRTMAX-1
397 #elif defined(SIGINFO) // macOS/BSD
398 #define PAUSE_SIGNAL SIGINFO
399 #define RESUME_SIGNAL SIGALRM
400 #endif
401 
402  // does nothing, here only to interrupt the sleep() in the pause handler
403  void sig_resume_handler(int sig, siginfo_t*, void*) {}
404 
405  // pause a thread so that a (slow) stacktrace will capture the current state
406  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
407  using namespace edm::service;
408 
409 #ifdef RESUME_SIGNAL
410  sigset_t sigset;
411  sigemptyset(&sigset);
412  sigaddset(&sigset, RESUME_SIGNAL);
413  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
414 #endif
415  // sleep interrrupts on a handled delivery of the resume signal
417 
418  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
421  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
422 
423  strlcpy(buff, "\nModule: ", moduleBufferSize);
425  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
426  strlcat(buff, ":", moduleBufferSize);
427  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
428  } else {
429  strlcat(buff, "none", moduleBufferSize);
430  }
432  }
433  }
434  }
435 
436  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
437  using namespace edm::service;
438 
439  const auto& tids = InitRootHandlers::threadIDs();
440 
441  const auto self = pthread_self();
442 #ifdef PAUSE_SIGNAL
443  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
444  // install the "pause" handler
445  struct sigaction act;
446  act.sa_sigaction = sig_pause_for_stacktrace;
447  act.sa_flags = 0;
448  sigemptyset(&act.sa_mask);
449  sigaction(PAUSE_SIGNAL, &act, nullptr);
450 
451  // unblock pause signal globally, resume is unblocked in the pause handler
452  sigset_t pausesigset;
453  sigemptyset(&pausesigset);
454  sigaddset(&pausesigset, PAUSE_SIGNAL);
455  sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
456 
457  // send a pause signal to all CMSSW/TBB threads other than self
458  for (auto id : tids) {
459  if (self != id) {
460  pthread_kill(id, PAUSE_SIGNAL);
461  }
462  }
463 
464 #ifdef RESUME_SIGNAL
465  // install the "resume" handler
466  act.sa_sigaction = sig_resume_handler;
467  sigaction(RESUME_SIGNAL, &act, nullptr);
468 #endif
469  }
470 #endif
471 
472  const char* signalname = "unknown";
473  switch (sig) {
474  case SIGBUS:
475  {
476  signalname = "bus error";
477  break;
478  }
479  case SIGSEGV:
480  {
481  signalname = "segmentation violation";
482  break;
483  }
484  case SIGILL:
485  {
486  signalname = "illegal instruction";
487  break;
488  }
489  case SIGTERM:
490  {
491  signalname = "external termination request";
492  break;
493  }
494  default:
495  break;
496  }
497  full_cerr_write("\n\nA fatal system signal has occurred: ");
498  full_cerr_write(signalname);
499  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
500 
502 
503  // resume the signal handlers to store the current module; we are not guaranteed they
504  // will have time to store their modules, so there is a race condition; this could be
505  // avoided by storing the module information before sleeping, a change that may be
506  // made when we're convinced accessing the thread-local current module is safe.
507 #ifdef RESUME_SIGNAL
508  std::size_t notified = 0;
509  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
510  for (auto id : tids) {
511  if (self != id) {
512  if (pthread_kill(id, RESUME_SIGNAL) == 0) ++notified;
513  }
514  }
515  }
516 #endif
517 
518  full_cerr_write("\nCurrent Modules:\n");
519 
520  // Checking tids.count(self) ensures that we only try to access the current module in
521  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
522  // time the thread is registered, so any lazy allocation will have been done at that
523  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
524  // is allocated at exec time, not lazily.
525  if (tids.count(self) > 0) {
526  char buff[moduleBufferSize] = "\nModule: ";
528  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
529  strlcat(buff, ":", moduleBufferSize);
530  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
531  } else {
532  strlcat(buff, "none", moduleBufferSize);
533  }
534  strlcat(buff, " (crashed)", moduleBufferSize);
535  full_cerr_write(buff);
536  } else {
537  full_cerr_write("\nModule: non-CMSSW (crashed)");
538  }
539 
540 #ifdef PAUSE_SIGNAL
541  // wait a short interval for the paused threads to resume and fill in their module
542  // information, then print
543  if (InitRootHandlers::doneModules_.is_lock_free()) {
544  int spincount = 0;
545  timespec t = { 0, 1000 };
546  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) { nanosleep(&t, nullptr); }
547  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
548  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
549  }
550  }
551 #endif
552 
553  full_cerr_write("\n\nA fatal system signal has occurred: ");
554  full_cerr_write(signalname);
555  full_cerr_write("\n");
556 
557  // For these four known cases, re-raise the signal so get the correct
558  // exit code.
559  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
560  {
561  signal(sig, SIG_DFL);
562  raise(sig);
563  }
564  else
565  {
566  ::abort();
567  }
568  }
569 
570  void sig_abort(int sig, siginfo_t*, void*) {
571  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
572 
573  // re-raise the signal to get the correct exit code
574  signal(sig, SIG_DFL);
575  raise(sig);
576 
577  // shouldn't get here
578  ::sleep(10);
579  ::abort();
580  }
581  }
582 
583  void set_default_signals() {
584  signal(SIGILL, SIG_DFL);
585  signal(SIGSEGV, SIG_DFL);
586  signal(SIGBUS, SIG_DFL);
587  signal(SIGTERM, SIG_DFL);
588  }
589 
590 } // end of unnamed namespace
591 
592 namespace edm {
593  namespace service {
594 
595  /*
596  * We've run into issues where GDB fails to print the thread which calls clone().
597  * To avoid this problem, we have an alternate approach below where the signal handler
598  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
599  * invocation; we don't care if that thread is missing from the traceback in this case.
600  */
601  static void cmssw_stacktrace_fork();
602 
604  {
605  int toParent = childToParent_[1];
606  int fromParent = parentToChild_[0];
607  char buf[2]; buf[1] = '\0';
608 
609  while(true)
610  {
611  int result = full_read(fromParent, buf, 1);
612  if (result < 0)
613  {
614  // To avoid a deadlock (this function is NOT re-entrant), reset signals
615  // We never set them back to the CMSSW handler because we assume the parent
616  // thread will abort for us.
617  set_default_signals();
618  close(toParent);
619  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
620  full_cerr_write(strerror(-result));
621  full_cerr_write("\n");
622  ::abort();
623  }
624  if (buf[0] == '1')
625  {
626  set_default_signals();
628  full_write(toParent, buf);
629  }
630  else if (buf[0] == '2')
631  {
632  // We have just finished forking. Reload the file descriptors for thread
633  // communication.
634  close(toParent);
635  close(fromParent);
636  toParent = childToParent_[1];
637  fromParent = parentToChild_[0];
638  }
639  else if (buf[0] == '3')
640  {
641  break;
642  }
643  else
644  {
645  set_default_signals();
646  close(toParent);
647  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
648  full_cerr_write(buf);
649  full_cerr_write("\n");
650  ::abort();
651  }
652  }
653  }
654 
656  {
657  int result = full_write(parentToChild_[1], "1");
658  if (result < 0)
659  {
660  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
661  full_cerr_write(strerror(-result));
662  full_cerr_write("\n");
663  return;
664  }
665  char buf[2]; buf[1] = '\0';
666  if ((result = full_read(childToParent_[0], buf, 1, 5*60)) < 0)
667  {
668  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
669  if (result == -ETIMEDOUT)
670  {
671  full_cerr_write("timed out waiting for GDB to complete.");
672  }
673  else
674  {
675  full_cerr_write(strerror(-result));
676  }
677  full_cerr_write("\n");
678  return;
679  }
680  }
681 
683  {
684  char child_stack[4*1024];
685  char *child_stack_ptr = child_stack + 4*1024;
686  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
687  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
688  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
689  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
690  int pid =
691 #ifdef __linux__
692  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
693 #else
694  fork();
695  if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
696  if (pid == 0) {edm::service::cmssw_stacktrace(nullptr); ::abort();}
697 #endif
698  if (pid == -1)
699  {
700  full_cerr_write("(Attempt to perform stack dump failed.)\n");
701  }
702  else
703  {
704  int status;
705  if (waitpid(pid, &status, 0) == -1)
706  {
707  full_cerr_write("(Failed to wait on stack dump output.)\n");
708  }
709  if (status)
710  {
711  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
712  }
713  }
714  }
715 
716  int cmssw_stacktrace(void * /*arg*/)
717  {
719  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
720  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
721  // calls dlsym.
722 #ifdef __linux__
723  syscall(SYS_execve, "/bin/sh", argv, __environ);
724 #else
725  execv("/bin/sh", argv);
726 #endif
727  ::abort();
728  return 1;
729  }
730 
731  static char pstackName[] = "(CMSSW stack trace helper)";
732  static char dashC[] = "-c";
735  int InitRootHandlers::parentToChild_[2] = {-1, -1};
736  int InitRootHandlers::childToParent_[2] = {-1, -1};
737  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
739  std::vector<std::array<char,moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
740  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
742 
743 
745  : RootHandlers(),
746  unloadSigHandler_(pset.getUntrackedParameter<bool> ("UnloadRootSigHandler")),
747  resetErrHandler_(pset.getUntrackedParameter<bool> ("ResetRootErrHandler")),
748  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
749  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool> ("AutoLibraryLoader"))
750  {
751  stackTracePause_ = pset.getUntrackedParameter<int> ("StackTracePauseTime");
752 
753  if(unloadSigHandler_) {
754  // Deactivate all the Root signal handlers and restore the system defaults
755  gSystem->ResetSignal(kSigChild);
756  gSystem->ResetSignal(kSigBus);
757  gSystem->ResetSignal(kSigSegmentationViolation);
758  gSystem->ResetSignal(kSigIllegalInstruction);
759  gSystem->ResetSignal(kSigSystem);
760  gSystem->ResetSignal(kSigPipe);
761  gSystem->ResetSignal(kSigAlarm);
762  gSystem->ResetSignal(kSigUrgent);
763  gSystem->ResetSignal(kSigFloatingException);
764  gSystem->ResetSignal(kSigWindowChanged);
765  } else if(pset.getUntrackedParameter<bool>("AbortOnSignal")){
766  cachePidInfo();
767 
768  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
769  // it requires an TApplication to be instantiated which causes problems
770  gSystem->ResetSignal(kSigBus);
771  gSystem->ResetSignal(kSigSegmentationViolation);
772  gSystem->ResetSignal(kSigIllegalInstruction);
773  installCustomHandler(SIGBUS,sig_dostack_then_abort);
774  sigBusHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
775  installCustomHandler(SIGBUS,sig_abort);
776  });
777  installCustomHandler(SIGSEGV,sig_dostack_then_abort);
778  sigSegvHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
779  installCustomHandler(SIGSEGV,sig_abort);
780  });
781  installCustomHandler(SIGILL,sig_dostack_then_abort);
782  sigIllHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
783  installCustomHandler(SIGILL,sig_abort);
784  });
785  installCustomHandler(SIGTERM,sig_dostack_then_abort);
786  sigTermHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
787  installCustomHandler(SIGTERM,sig_abort);
788  });
789  }
790 
791  iReg.watchPreallocate([this](edm::service::SystemBounds const& iBounds){
792  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
793  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
794  }
795  });
796 
797  if(resetErrHandler_) {
798 
799  // Replace the Root error handler with one that uses the MessageLogger
800  SetErrorHandler(RootErrorHandler);
801  }
802 
803  // Enable automatic Root library loading.
804  if(autoLibraryLoader_) {
805  gInterpreter->SetClassAutoloading(1);
806  }
807 
808  // Set ROOT parameters.
809  TTree::SetMaxTreeSize(kMaxLong64);
810  TH1::AddDirectory(kFALSE);
811  //G__SetCatchException(0);
812 
813  // Set custom streamers
815 
816  // Load the library containing dictionaries for std:: classes, if not already loaded.
817  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int> >))) {
818  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
819  }
820 
821  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
822  if(debugLevel >0) {
823  gDebug = debugLevel;
824  }
825 
826  // Enable Root implicit multi-threading
827  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
828  if (imt) ROOT::EnableImplicitMT();
829  }
830 
832  // close all open ROOT files
833  TIter iter(gROOT->GetListOfFiles());
834  TObject *obj = nullptr;
835  while(nullptr != (obj = iter.Next())) {
836  TFile* f = dynamic_cast<TFile*>(obj);
837  if(f) {
838  // We get a new iterator each time,
839  // because closing a file can invalidate the iterator
840  f->Close();
841  iter = TIter(gROOT->GetListOfFiles());
842  }
843  }
844  }
845 
847  //Tell Root we want to be multi-threaded
848  ROOT::EnableThreadSafety();
849 
850  //When threading, also have to keep ROOT from logging all TObjects into a list
851  TObject::SetObjectStat(false);
852 
853  //Have to avoid having Streamers modify themselves after they have been used
854  TVirtualStreamerInfo::Optimize(false);
855  }
856 
859  desc.setComment("Centralized interface to ROOT.");
860  desc.addUntracked<bool>("UnloadRootSigHandler", false)
861  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
862  desc.addUntracked<bool>("ResetRootErrHandler", true)
863  ->setComment("If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
864  desc.addUntracked<bool>("AutoLibraryLoader", true)
865  ->setComment("If True, enables automatic loading of data dictionaries.");
866  desc.addUntracked<bool>("LoadAllDictionaries",false)
867  ->setComment("If True, loads all ROOT dictionaries.");
868  desc.addUntracked<bool>("EnableIMT",false)
869  ->setComment("If True, calls ROOT::EnableImplicitMT().");
870  desc.addUntracked<bool>("AbortOnSignal",true)
871  ->setComment("If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
872  desc.addUntracked<int>("DebugLevel",0)
873  ->setComment("Sets ROOT's gDebug value.");
874  desc.addUntracked<int>("StackTracePauseTime", 300)
875  ->setComment("Seconds to pause other threads during stack trace.");
876  descriptions.add("InitRootHandlers", desc);
877  }
878 
879  char *const *
881  return pstackArgv_;
882  }
883 
884  void
886  s_ignoreWarnings =false;
887  }
888 
889  void
891  s_ignoreWarnings = true;
892  }
893 
894  void
896  {
897  if(helperThread_) {
898  //Another InitRootHandlers was initialized in this job, possibly
899  // because multiple EventProcessors are being used.
900  //In that case, we are already all setup
901  return;
902  }
903  if (snprintf(pidString_, pidStringLength_-1, "gdb -quiet -p %d 2>&1 <<EOF |\n"
904  "set width 0\n"
905  "set height 0\n"
906  "set pagination no\n"
907  "thread apply all bt\n"
908  "EOF\n"
909  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >= pidStringLength_)
910  {
911  std::ostringstream sstr;
912  sstr << "Unable to pre-allocate stacktrace handler information";
913  edm::Exception except(edm::errors::OtherCMS, sstr.str());
914  throw except;
915  }
916 
917  // These are initialized to -1; harmless to close an invalid FD.
918  // If this is called post-fork, we don't want to be communicating on
919  // these FDs as they are used internally by the parent.
920  close(childToParent_[0]);
921  close(childToParent_[1]);
922  childToParent_[0] = -1; childToParent_[1] = -1;
923  close(parentToChild_[0]);
924  close(parentToChild_[1]);
925  parentToChild_[0] = -1; parentToChild_[1] = -1;
926 
927  if (-1 == pipe2(childToParent_, O_CLOEXEC))
928  {
929  std::ostringstream sstr;
930  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
931  edm::Exception except(edm::errors::OtherCMS, sstr.str());
932  throw except;
933  }
934 
935  if (-1 == pipe2(parentToChild_, O_CLOEXEC))
936  {
937  close(childToParent_[0]); close(childToParent_[1]);
938  childToParent_[0] = -1; childToParent_[1] = -1;
939  std::ostringstream sstr;
940  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
941  edm::Exception except(edm::errors::OtherCMS, sstr.str());
942  throw except;
943  }
944 
945  helperThread_.reset(new std::thread(stacktraceHelperThread));
946  helperThread_->detach();
947  }
948 
949  } // end of namespace service
950 } // end of namespace edm
951 
955 
size
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
T getUntrackedParameter(std::string const &, T const &) const
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:117
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:99
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
#define constexpr
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:59
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:235
double f[11][100]
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
Definition: TBBSession.h:67
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
static void fillDescriptions(ConfigurationDescriptions &descriptions)
SeverityLevel
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
def write(self, setup)