22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 41 #include "TInterpreter.h" 44 #include "TUnixSystem.h" 46 #include "TVirtualStreamerInfo.h" 48 #include "TClassTable.h" 55 constexpr std::size_t moduleBufferSize = 128;
152 thread_local
bool s_ignoreWarnings =
false;
154 bool s_ignoreEverything =
false;
156 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
164 if (level >= kFatal) {
165 el_severity = SeverityLevel::kFatal;
166 }
else if (level >= kSysError) {
167 el_severity = SeverityLevel::kSysError;
168 }
else if (level >=
kError) {
174 if(s_ignoreEverything) {
185 if (message != 0) el_message =
message;
196 size_t index1 = el_message.find(precursor);
197 if (index1 != std::string::npos) {
198 size_t index2 = index1 + precursor.length();
199 size_t index3 = el_message.find_first_of(
" :", index2);
200 if (index3 != std::string::npos) {
201 size_t substrlen = index3-index2;
202 el_identifier +=
"-";
203 el_identifier += el_message.substr(index2,substrlen);
206 index1 = el_location.find(
"::");
207 if (index1 != std::string::npos) {
208 el_identifier +=
"/";
209 el_identifier += el_location.substr(0, index1);
215 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
216 && (el_message.find(
"fill branch") != std::string::npos)
217 && (el_message.find(
"address") != std::string::npos)
218 && (el_message.find(
"not set") != std::string::npos)) {
219 el_severity = SeverityLevel::kFatal;
222 if ((el_message.find(
"Tree branches") != std::string::npos)
223 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
224 el_severity = SeverityLevel::kFatal;
230 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
231 (el_message.find(
"already in TClassTable") != std::string::npos) ||
232 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
233 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
234 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
235 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
236 (el_location.find(
"Fit") != std::string::npos) ||
237 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
238 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
239 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
240 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
241 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
242 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
243 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
245 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
246 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
262 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
263 std::ostringstream sstr;
264 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
267 except.clearMessage();
275 if (el_severity == SeverityLevel::kFatal) {
277 }
else if (el_severity == SeverityLevel::kSysError) {
284 edm::LogInfo(
"Root_Information") << el_location << el_message ;
288 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
289 RootErrorHandlerImpl(level, location, message);
294 static int full_write(
int fd,
const char *
text)
297 size_t count = strlen(text);
301 written =
write(fd, buffer, count);
304 if (errno == EINTR) {
continue;}
305 else {
return -errno;}
313 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
317 ssize_t complete = 0;
324 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
330 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
339 struct pollfd poll_info{fd, POLLIN, 0};
341 if (ms_remaining > 0)
343 if (poll(&poll_info, 1, ms_remaining) == 0)
345 if ((flags & O_NONBLOCK) != O_NONBLOCK)
347 fcntl(fd, F_SETFL, flags);
352 else if (ms_remaining < 0)
354 if ((flags & O_NONBLOCK) != O_NONBLOCK)
356 fcntl(fd, F_SETFL, flags);
361 complete = read(fd, buf, count);
364 if (errno == EINTR) {
continue;}
365 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
368 int orig_errno = errno;
369 if ((flags & O_NONBLOCK) != O_NONBLOCK)
371 fcntl(fd, F_SETFL, flags);
379 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
380 fcntl(fd, F_SETFL, flags);
385 static int full_cerr_write(
const char *text)
387 return full_write(2, text);
394 #if defined(SIGRTMAX) 395 #define PAUSE_SIGNAL SIGRTMAX 396 #define RESUME_SIGNAL SIGRTMAX-1 397 #elif defined(SIGINFO) // macOS/BSD 398 #define PAUSE_SIGNAL SIGINFO 399 #define RESUME_SIGNAL SIGALRM 403 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
406 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
411 sigemptyset(&sigset);
412 sigaddset(&sigset, RESUME_SIGNAL);
413 pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
423 strlcpy(buff,
"\nModule: ", moduleBufferSize);
426 strlcat(buff,
":", moduleBufferSize);
429 strlcat(buff,
"none", moduleBufferSize);
436 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
441 const auto self = pthread_self();
445 struct sigaction act;
446 act.sa_sigaction = sig_pause_for_stacktrace;
448 sigemptyset(&act.sa_mask);
449 sigaction(PAUSE_SIGNAL, &act,
NULL);
452 sigset_t pausesigset;
453 sigemptyset(&pausesigset);
454 sigaddset(&pausesigset, PAUSE_SIGNAL);
455 sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
458 for (
auto id : tids) {
460 pthread_kill(
id, PAUSE_SIGNAL);
466 act.sa_sigaction = sig_resume_handler;
467 sigaction(RESUME_SIGNAL, &act,
NULL);
472 const char* signalname =
"unknown";
476 signalname =
"bus error";
481 signalname =
"segmentation violation";
486 signalname =
"illegal instruction";
491 signalname =
"external termination request";
497 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
498 full_cerr_write(signalname);
499 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
508 std::size_t notified = 0;
510 for (
auto id : tids) {
512 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
518 full_cerr_write(
"\nCurrent Modules:\n");
525 if (tids.count(
self) > 0) {
526 char buff[moduleBufferSize] =
"\nModule: ";
529 strlcat(buff,
":", moduleBufferSize);
532 strlcat(buff,
"none", moduleBufferSize);
534 strlcat(buff,
" (crashed)", moduleBufferSize);
535 full_cerr_write(buff);
537 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
545 timespec
t = { 0, 1000 };
553 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
554 full_cerr_write(signalname);
555 full_cerr_write(
"\n");
559 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
561 signal(sig, SIG_DFL);
570 void sig_abort(
int sig, siginfo_t*,
void*) {
571 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
574 signal(sig, SIG_DFL);
583 void set_default_signals() {
584 signal(SIGILL, SIG_DFL);
585 signal(SIGSEGV, SIG_DFL);
586 signal(SIGBUS, SIG_DFL);
587 signal(SIGTERM, SIG_DFL);
607 char buf[2]; buf[1] =
'\0';
611 int result = full_read(fromParent, buf, 1);
617 set_default_signals();
619 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
620 full_cerr_write(strerror(-result));
621 full_cerr_write(
"\n");
626 set_default_signals();
628 full_write(toParent, buf);
630 else if (buf[0] ==
'2')
639 else if (buf[0] ==
'3')
645 set_default_signals();
647 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
648 full_cerr_write(buf);
649 full_cerr_write(
"\n");
660 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
661 full_cerr_write(strerror(-result));
662 full_cerr_write(
"\n");
665 char buf[2]; buf[1] =
'\0';
668 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
669 if (result == -ETIMEDOUT)
671 full_cerr_write(
"timed out waiting for GDB to complete.");
675 full_cerr_write(strerror(-result));
677 full_cerr_write(
"\n");
684 char child_stack[4*1024];
685 char *child_stack_ptr = child_stack + 4*1024;
695 if (child_stack_ptr) {}
700 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
705 if (waitpid(pid, &status, 0) == -1)
707 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
711 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
723 syscall(SYS_execve,
"/bin/sh", argv, __environ);
725 execv(
"/bin/sh", argv);
755 gSystem->ResetSignal(kSigChild);
756 gSystem->ResetSignal(kSigBus);
757 gSystem->ResetSignal(kSigSegmentationViolation);
758 gSystem->ResetSignal(kSigIllegalInstruction);
759 gSystem->ResetSignal(kSigSystem);
760 gSystem->ResetSignal(kSigPipe);
761 gSystem->ResetSignal(kSigAlarm);
762 gSystem->ResetSignal(kSigUrgent);
763 gSystem->ResetSignal(kSigFloatingException);
764 gSystem->ResetSignal(kSigWindowChanged);
770 gSystem->ResetSignal(kSigBus);
771 gSystem->ResetSignal(kSigSegmentationViolation);
772 gSystem->ResetSignal(kSigIllegalInstruction);
800 SetErrorHandler(RootErrorHandler);
805 gInterpreter->SetClassAutoloading(1);
809 TTree::SetMaxTreeSize(kMaxLong64);
810 TH1::AddDirectory(kFALSE);
817 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
828 if (imt) ROOT::EnableImplicitMT();
833 TIter iter(gROOT->GetListOfFiles());
834 TObject *
obj =
nullptr;
835 while(
nullptr != (obj = iter.Next())) {
836 TFile*
f =
dynamic_cast<TFile*
>(
obj);
841 iter = TIter(gROOT->GetListOfFiles());
848 ROOT::EnableThreadSafety();
851 TObject::SetObjectStat(
false);
854 TVirtualStreamerInfo::Optimize(
false);
859 desc.
setComment(
"Centralized interface to ROOT.");
861 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
863 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
865 ->setComment(
"If True, enables automatic loading of data dictionaries.");
867 ->setComment(
"If True, loads all ROOT dictionaries.");
869 ->setComment(
"If True, calls ROOT::EnableImplicitMT().");
871 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
873 ->setComment(
"Sets ROOT's gDebug value.");
875 ->setComment(
"Seconds to pause other threads during stack trace.");
876 descriptions.
add(
"InitRootHandlers", desc);
886 s_ignoreWarnings =
false;
891 s_ignoreWarnings =
true;
906 "set pagination no\n" 907 "thread apply all bt\n" 909 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
911 std::ostringstream sstr;
912 sstr <<
"Unable to pre-allocate stacktrace handler information";
929 std::ostringstream sstr;
930 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
939 std::ostringstream sstr;
940 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
void on_scheduler_entry(bool)
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
virtual void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
virtual ~InitRootHandlers()
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
virtual void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)