22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 41 #include "TInterpreter.h" 44 #include "TUnixSystem.h" 46 #include "TVirtualStreamerInfo.h" 48 #include "TClassTable.h" 55 constexpr std::size_t moduleBufferSize = 128;
152 thread_local
bool s_ignoreWarnings =
false;
154 bool s_ignoreEverything =
false;
156 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
164 if (level >= kFatal) {
165 el_severity = SeverityLevel::kFatal;
166 }
else if (level >= kSysError) {
167 el_severity = SeverityLevel::kSysError;
168 }
else if (level >=
kError) {
174 if(s_ignoreEverything) {
185 if (message !=
nullptr) el_message =
message;
196 size_t index1 = el_message.find(precursor);
197 if (index1 != std::string::npos) {
198 size_t index2 = index1 + precursor.length();
199 size_t index3 = el_message.find_first_of(
" :", index2);
200 if (index3 != std::string::npos) {
201 size_t substrlen = index3-index2;
202 el_identifier +=
"-";
203 el_identifier += el_message.substr(index2,substrlen);
206 index1 = el_location.find(
"::");
207 if (index1 != std::string::npos) {
208 el_identifier +=
"/";
209 el_identifier += el_location.substr(0, index1);
215 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
216 && (el_message.find(
"fill branch") != std::string::npos)
217 && (el_message.find(
"address") != std::string::npos)
218 && (el_message.find(
"not set") != std::string::npos)) {
219 el_severity = SeverityLevel::kFatal;
222 if ((el_message.find(
"Tree branches") != std::string::npos)
223 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
224 el_severity = SeverityLevel::kFatal;
230 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
231 (el_message.find(
"already in TClassTable") != std::string::npos) ||
232 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
233 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
234 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
235 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
236 (el_location.find(
"Fit") != std::string::npos) ||
237 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
238 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
239 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
240 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
241 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
242 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
243 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
245 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
246 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
262 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
263 std::ostringstream sstr;
264 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
267 except.clearMessage();
275 if (el_severity == SeverityLevel::kFatal) {
277 }
else if (el_severity == SeverityLevel::kSysError) {
284 edm::LogInfo(
"Root_Information") << el_location << el_message ;
288 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
289 RootErrorHandlerImpl(level, location, message);
294 static int full_write(
int fd,
const char *
text)
297 size_t count = strlen(text);
301 written =
write(fd, buffer, count);
304 if (errno == EINTR) {
continue;}
305 else {
return -errno;}
313 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
317 ssize_t complete = 0;
324 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
330 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
339 struct pollfd poll_info{fd, POLLIN, 0};
341 if (ms_remaining > 0)
343 int rc = poll(&poll_info, 1, ms_remaining);
347 if (errno == EINTR || errno == EAGAIN) {
continue; }
352 if ((flags & O_NONBLOCK) != O_NONBLOCK)
354 fcntl(fd, F_SETFL, flags);
359 else if (ms_remaining < 0)
361 if ((flags & O_NONBLOCK) != O_NONBLOCK)
363 fcntl(fd, F_SETFL, flags);
368 complete = read(fd, buf, count);
371 if (errno == EINTR) {
continue;}
372 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
375 int orig_errno = errno;
376 if ((flags & O_NONBLOCK) != O_NONBLOCK)
378 fcntl(fd, F_SETFL, flags);
386 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
387 fcntl(fd, F_SETFL, flags);
392 static int full_cerr_write(
const char *text)
394 return full_write(2, text);
401 #if defined(SIGRTMAX) 402 #define PAUSE_SIGNAL SIGRTMAX 403 #define RESUME_SIGNAL SIGRTMAX-1 404 #elif defined(SIGINFO) // macOS/BSD 405 #define PAUSE_SIGNAL SIGINFO 406 #define RESUME_SIGNAL SIGALRM 410 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
413 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
418 sigemptyset(&sigset);
419 sigaddset(&sigset, RESUME_SIGNAL);
420 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
430 strlcpy(buff,
"\nModule: ", moduleBufferSize);
433 strlcat(buff,
":", moduleBufferSize);
436 strlcat(buff,
"none", moduleBufferSize);
443 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
448 const auto self = pthread_self();
452 struct sigaction act;
453 act.sa_sigaction = sig_pause_for_stacktrace;
455 sigemptyset(&act.sa_mask);
456 sigaction(PAUSE_SIGNAL, &act,
nullptr);
459 sigset_t pausesigset;
460 sigemptyset(&pausesigset);
461 sigaddset(&pausesigset, PAUSE_SIGNAL);
462 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
465 for (
auto id : tids) {
467 pthread_kill(
id, PAUSE_SIGNAL);
473 act.sa_sigaction = sig_resume_handler;
474 sigaction(RESUME_SIGNAL, &act,
nullptr);
479 const char* signalname =
"unknown";
483 signalname =
"bus error";
488 signalname =
"segmentation violation";
493 signalname =
"illegal instruction";
498 signalname =
"external termination request";
504 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
505 full_cerr_write(signalname);
506 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
515 std::size_t notified = 0;
517 for (
auto id : tids) {
519 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
525 full_cerr_write(
"\nCurrent Modules:\n");
532 if (tids.count(
self) > 0) {
533 char buff[moduleBufferSize] =
"\nModule: ";
536 strlcat(buff,
":", moduleBufferSize);
539 strlcat(buff,
"none", moduleBufferSize);
541 strlcat(buff,
" (crashed)", moduleBufferSize);
542 full_cerr_write(buff);
544 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
552 timespec
t = { 0, 1000 };
560 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
561 full_cerr_write(signalname);
562 full_cerr_write(
"\n");
566 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
568 signal(sig, SIG_DFL);
577 void sig_abort(
int sig, siginfo_t*,
void*) {
578 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
581 signal(sig, SIG_DFL);
590 void set_default_signals() {
591 signal(SIGILL, SIG_DFL);
592 signal(SIGSEGV, SIG_DFL);
593 signal(SIGBUS, SIG_DFL);
594 signal(SIGTERM, SIG_DFL);
614 char buf[2]; buf[1] =
'\0';
618 int result = full_read(fromParent, buf, 1);
624 set_default_signals();
626 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
627 full_cerr_write(strerror(-result));
628 full_cerr_write(
"\n");
633 set_default_signals();
635 full_write(toParent, buf);
637 else if (buf[0] ==
'2')
646 else if (buf[0] ==
'3')
652 set_default_signals();
654 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
655 full_cerr_write(buf);
656 full_cerr_write(
"\n");
667 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
668 full_cerr_write(strerror(-result));
669 full_cerr_write(
"\n");
672 char buf[2]; buf[1] =
'\0';
675 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
676 if (result == -ETIMEDOUT)
678 full_cerr_write(
"timed out waiting for GDB to complete.");
682 full_cerr_write(strerror(-result));
684 full_cerr_write(
"\n");
691 char child_stack[4*1024];
692 char *child_stack_ptr = child_stack + 4*1024;
702 if (child_stack_ptr) {}
707 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
712 if (waitpid(pid, &status, 0) == -1)
714 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
718 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
730 syscall(SYS_execve,
"/bin/sh", argv, __environ);
732 execv(
"/bin/sh", argv);
762 gSystem->ResetSignal(kSigChild);
763 gSystem->ResetSignal(kSigBus);
764 gSystem->ResetSignal(kSigSegmentationViolation);
765 gSystem->ResetSignal(kSigIllegalInstruction);
766 gSystem->ResetSignal(kSigSystem);
767 gSystem->ResetSignal(kSigPipe);
768 gSystem->ResetSignal(kSigAlarm);
769 gSystem->ResetSignal(kSigUrgent);
770 gSystem->ResetSignal(kSigFloatingException);
771 gSystem->ResetSignal(kSigWindowChanged);
777 gSystem->ResetSignal(kSigBus);
778 gSystem->ResetSignal(kSigSegmentationViolation);
779 gSystem->ResetSignal(kSigIllegalInstruction);
807 SetErrorHandler(RootErrorHandler);
812 gInterpreter->SetClassAutoloading(1);
816 TTree::SetMaxTreeSize(kMaxLong64);
817 TH1::AddDirectory(kFALSE);
824 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
835 if (imt && not ROOT::IsImplicitMTEnabled()) {
836 ROOT::EnableImplicitMT();
842 TIter iter(gROOT->GetListOfFiles());
843 TObject *
obj =
nullptr;
844 while(
nullptr != (obj = iter.Next())) {
845 TFile*
f =
dynamic_cast<TFile*
>(
obj);
850 iter = TIter(gROOT->GetListOfFiles());
857 ROOT::EnableThreadSafety();
860 TObject::SetObjectStat(
false);
863 TVirtualStreamerInfo::Optimize(
false);
868 desc.
setComment(
"Centralized interface to ROOT.");
870 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
872 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
874 ->setComment(
"If True, enables automatic loading of data dictionaries.");
876 ->setComment(
"If True, loads all ROOT dictionaries.");
878 ->setComment(
"If True, calls ROOT::EnableImplicitMT().");
880 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
882 ->setComment(
"Sets ROOT's gDebug value.");
884 ->setComment(
"Seconds to pause other threads during stack trace.");
885 descriptions.
add(
"InitRootHandlers", desc);
895 s_ignoreWarnings =
false;
900 s_ignoreWarnings =
true;
915 "set pagination no\n" 916 "thread apply all bt\n" 918 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
920 std::ostringstream sstr;
921 sstr <<
"Unable to pre-allocate stacktrace handler information";
938 std::ostringstream sstr;
939 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
948 std::ostringstream sstr;
949 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
~InitRootHandlers() override
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
void on_scheduler_entry(bool) override