22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 41 #include "TInterpreter.h" 44 #include "TUnixSystem.h" 46 #include "TVirtualStreamerInfo.h" 48 #include "TClassTable.h" 55 constexpr std::size_t moduleBufferSize = 128;
157 static thread_local
bool s_ignoreWarnings =
false;
159 static bool s_ignoreEverything =
false;
161 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
169 if (level >= kFatal) {
170 el_severity = SeverityLevel::kFatal;
171 }
else if (level >= kSysError) {
172 el_severity = SeverityLevel::kSysError;
173 }
else if (level >=
kError) {
179 if(s_ignoreEverything) {
190 if (message != 0) el_message =
message;
201 size_t index1 = el_message.find(precursor);
202 if (index1 != std::string::npos) {
203 size_t index2 = index1 + precursor.length();
204 size_t index3 = el_message.find_first_of(
" :", index2);
205 if (index3 != std::string::npos) {
206 size_t substrlen = index3-index2;
207 el_identifier +=
"-";
208 el_identifier += el_message.substr(index2,substrlen);
211 index1 = el_location.find(
"::");
212 if (index1 != std::string::npos) {
213 el_identifier +=
"/";
214 el_identifier += el_location.substr(0, index1);
220 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
221 && (el_message.find(
"fill branch") != std::string::npos)
222 && (el_message.find(
"address") != std::string::npos)
223 && (el_message.find(
"not set") != std::string::npos)) {
224 el_severity = SeverityLevel::kFatal;
227 if ((el_message.find(
"Tree branches") != std::string::npos)
228 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
229 el_severity = SeverityLevel::kFatal;
235 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
236 (el_message.find(
"already in TClassTable") != std::string::npos) ||
237 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
238 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
239 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
240 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
241 (el_location.find(
"Fit") != std::string::npos) ||
242 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
243 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
244 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
245 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
246 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
247 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
248 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
250 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
251 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
267 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
268 std::ostringstream sstr;
269 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
272 except.clearMessage();
280 if (el_severity == SeverityLevel::kFatal) {
282 }
else if (el_severity == SeverityLevel::kSysError) {
289 edm::LogInfo(
"Root_Information") << el_location << el_message ;
293 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
294 RootErrorHandlerImpl(level, location, message);
299 static int full_write(
int fd,
const char *
text)
302 size_t count = strlen(text);
306 written =
write(fd, buffer, count);
309 if (errno == EINTR) {
continue;}
310 else {
return -errno;}
318 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
322 ssize_t complete = 0;
329 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
335 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
344 struct pollfd poll_info{fd, POLLIN, 0};
346 if (ms_remaining > 0)
348 if (poll(&poll_info, 1, ms_remaining) == 0)
350 if ((flags & O_NONBLOCK) != O_NONBLOCK)
352 fcntl(fd, F_SETFL, flags);
357 else if (ms_remaining < 0)
359 if ((flags & O_NONBLOCK) != O_NONBLOCK)
361 fcntl(fd, F_SETFL, flags);
366 complete = read(fd, buf, count);
369 if (errno == EINTR) {
continue;}
370 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
373 int orig_errno = errno;
374 if ((flags & O_NONBLOCK) != O_NONBLOCK)
376 fcntl(fd, F_SETFL, flags);
384 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
385 fcntl(fd, F_SETFL, flags);
390 static int full_cerr_write(
const char *text)
392 return full_write(2, text);
399 #if defined(SIGRTMAX) 400 #define PAUSE_SIGNAL SIGRTMAX 401 #define RESUME_SIGNAL SIGRTMAX-1 402 #elif defined(SIGINFO) // macOS/BSD 403 #define PAUSE_SIGNAL SIGINFO 404 #define RESUME_SIGNAL SIGALRM 408 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
411 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
416 sigemptyset(&sigset);
417 sigaddset(&sigset, RESUME_SIGNAL);
418 pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
428 strlcpy(buff,
"\nModule: ", moduleBufferSize);
432 strlcat(buff,
"none", moduleBufferSize);
439 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
444 const auto self = pthread_self();
448 struct sigaction act;
449 act.sa_sigaction = sig_pause_for_stacktrace;
451 sigemptyset(&act.sa_mask);
452 sigaction(PAUSE_SIGNAL, &act,
NULL);
455 sigset_t pausesigset;
456 sigemptyset(&pausesigset);
457 sigaddset(&pausesigset, PAUSE_SIGNAL);
458 sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
461 for (
auto id : tids) {
463 pthread_kill(
id, PAUSE_SIGNAL);
469 act.sa_sigaction = sig_resume_handler;
470 sigaction(RESUME_SIGNAL, &act,
NULL);
475 const char* signalname =
"unknown";
479 signalname =
"bus error";
484 signalname =
"segmentation violation";
489 signalname =
"illegal instruction";
494 signalname =
"external termination request";
500 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
501 full_cerr_write(signalname);
502 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
511 std::size_t notified = 0;
513 for (
auto id : tids) {
515 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
521 full_cerr_write(
"\nCurrent Modules:\n");
528 if (tids.count(
self) > 0) {
529 char buff[moduleBufferSize] =
"\nModule: ";
533 strlcat(buff,
"none", moduleBufferSize);
535 strlcat(buff,
" (crashed)", moduleBufferSize);
536 full_cerr_write(buff);
538 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
546 timespec
t = { 0, 1000 };
554 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
555 full_cerr_write(signalname);
556 full_cerr_write(
"\n");
560 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
562 signal(sig, SIG_DFL);
571 void sig_abort(
int sig, siginfo_t*,
void*) {
572 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
575 signal(sig, SIG_DFL);
584 void set_default_signals() {
585 signal(SIGILL, SIG_DFL);
586 signal(SIGSEGV, SIG_DFL);
587 signal(SIGBUS, SIG_DFL);
588 signal(SIGTERM, SIG_DFL);
608 char buf[2]; buf[1] =
'\0';
612 int result = full_read(fromParent, buf, 1);
618 set_default_signals();
620 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
621 full_cerr_write(strerror(-result));
622 full_cerr_write(
"\n");
627 set_default_signals();
629 full_write(toParent, buf);
631 else if (buf[0] ==
'2')
640 else if (buf[0] ==
'3')
646 set_default_signals();
648 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
649 full_cerr_write(buf);
650 full_cerr_write(
"\n");
661 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
662 full_cerr_write(strerror(-result));
663 full_cerr_write(
"\n");
666 char buf[2]; buf[1] =
'\0';
669 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
670 if (result == -ETIMEDOUT)
672 full_cerr_write(
"timed out waiting for GDB to complete.");
676 full_cerr_write(strerror(-result));
678 full_cerr_write(
"\n");
685 char child_stack[4*1024];
686 char *child_stack_ptr = child_stack + 4*1024;
696 if (child_stack_ptr) {}
701 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
706 if (waitpid(pid, &status, 0) == -1)
708 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
712 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
724 syscall(SYS_execve,
"/bin/sh", argv, __environ);
726 execv(
"/bin/sh", argv);
756 gSystem->ResetSignal(kSigChild);
757 gSystem->ResetSignal(kSigBus);
758 gSystem->ResetSignal(kSigSegmentationViolation);
759 gSystem->ResetSignal(kSigIllegalInstruction);
760 gSystem->ResetSignal(kSigSystem);
761 gSystem->ResetSignal(kSigPipe);
762 gSystem->ResetSignal(kSigAlarm);
763 gSystem->ResetSignal(kSigUrgent);
764 gSystem->ResetSignal(kSigFloatingException);
765 gSystem->ResetSignal(kSigWindowChanged);
771 gSystem->ResetSignal(kSigBus);
772 gSystem->ResetSignal(kSigSegmentationViolation);
773 gSystem->ResetSignal(kSigIllegalInstruction);
802 SetErrorHandler(RootErrorHandler);
807 gInterpreter->SetClassAutoloading(1);
811 TTree::SetMaxTreeSize(kMaxLong64);
812 TH1::AddDirectory(kFALSE);
819 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
830 if (imt) ROOT::EnableImplicitMT();
835 TIter iter(gROOT->GetListOfFiles());
836 TObject *
obj =
nullptr;
837 while(
nullptr != (obj = iter.Next())) {
838 TFile*
f =
dynamic_cast<TFile*
>(
obj);
843 iter = TIter(gROOT->GetListOfFiles());
850 ROOT::EnableThreadSafety();
853 TObject::SetObjectStat(
false);
856 TVirtualStreamerInfo::Optimize(
false);
861 desc.
setComment(
"Centralized interface to ROOT.");
863 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
865 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
867 ->setComment(
"If True, enables automatic loading of data dictionaries.");
869 ->setComment(
"If True, loads all ROOT dictionaries.");
871 ->setComment(
"If True, calls ROOT::EnableImplicitMT().");
873 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
875 ->setComment(
"Sets ROOT's gDebug value.");
877 ->setComment(
"Seconds to pause other threads during stack trace.");
878 descriptions.
add(
"InitRootHandlers", desc);
888 s_ignoreWarnings =
false;
893 s_ignoreWarnings =
true;
908 "set pagination no\n" 909 "thread apply all bt\n" 911 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
913 std::ostringstream sstr;
914 sstr <<
"Unable to pre-allocate stacktrace handler information";
931 std::ostringstream sstr;
932 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
941 std::ostringstream sstr;
942 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
void on_scheduler_entry(bool)
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
virtual void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
virtual ~InitRootHandlers()
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)