22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 41 #include "TInterpreter.h" 44 #include "TUnixSystem.h" 46 #include "TVirtualStreamerInfo.h" 49 #include "TClassTable.h" 56 constexpr std::size_t moduleBufferSize = 128;
159 static thread_local
bool s_ignoreWarnings =
false;
161 static bool s_ignoreEverything =
false;
163 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
171 if (level >= kFatal) {
172 el_severity = SeverityLevel::kFatal;
173 }
else if (level >= kSysError) {
174 el_severity = SeverityLevel::kSysError;
175 }
else if (level >=
kError) {
181 if(s_ignoreEverything) {
192 if (message != 0) el_message =
message;
203 size_t index1 = el_message.find(precursor);
204 if (index1 != std::string::npos) {
205 size_t index2 = index1 + precursor.length();
206 size_t index3 = el_message.find_first_of(
" :", index2);
207 if (index3 != std::string::npos) {
208 size_t substrlen = index3-index2;
209 el_identifier +=
"-";
210 el_identifier += el_message.substr(index2,substrlen);
213 index1 = el_location.find(
"::");
214 if (index1 != std::string::npos) {
215 el_identifier +=
"/";
216 el_identifier += el_location.substr(0, index1);
222 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
223 && (el_message.find(
"fill branch") != std::string::npos)
224 && (el_message.find(
"address") != std::string::npos)
225 && (el_message.find(
"not set") != std::string::npos)) {
226 el_severity = SeverityLevel::kFatal;
229 if ((el_message.find(
"Tree branches") != std::string::npos)
230 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
231 el_severity = SeverityLevel::kFatal;
237 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
238 (el_message.find(
"already in TClassTable") != std::string::npos) ||
239 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
240 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
241 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
242 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
243 (el_location.find(
"Fit") != std::string::npos) ||
244 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
245 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
246 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
247 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
248 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
249 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
250 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
252 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
253 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
269 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
270 std::ostringstream sstr;
271 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
274 except.clearMessage();
282 if (el_severity == SeverityLevel::kFatal) {
284 }
else if (el_severity == SeverityLevel::kSysError) {
291 edm::LogInfo(
"Root_Information") << el_location << el_message ;
295 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
296 RootErrorHandlerImpl(level, location, message);
301 static int full_write(
int fd,
const char *
text)
303 const char *buffer =
text;
304 size_t count = strlen(text);
308 written =
write(fd, buffer, count);
311 if (errno == EINTR) {
continue;}
312 else {
return -errno;}
320 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
324 ssize_t complete = 0;
331 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
337 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
346 struct pollfd poll_info{fd, POLLIN, 0};
348 if (ms_remaining > 0)
350 if (poll(&poll_info, 1, ms_remaining) == 0)
352 if ((flags & O_NONBLOCK) != O_NONBLOCK)
354 fcntl(fd, F_SETFL, flags);
359 else if (ms_remaining < 0)
361 if ((flags & O_NONBLOCK) != O_NONBLOCK)
363 fcntl(fd, F_SETFL, flags);
368 complete = read(fd, buf, count);
371 if (errno == EINTR) {
continue;}
372 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
375 int orig_errno = errno;
376 if ((flags & O_NONBLOCK) != O_NONBLOCK)
378 fcntl(fd, F_SETFL, flags);
386 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
387 fcntl(fd, F_SETFL, flags);
392 static int full_cerr_write(
const char *text)
394 return full_write(2, text);
401 #if defined(SIGRTMAX) 402 #define PAUSE_SIGNAL SIGRTMAX 403 #define RESUME_SIGNAL SIGRTMAX-1 404 #elif defined(SIGINFO) // macOS/BSD 405 #define PAUSE_SIGNAL SIGINFO 406 #define RESUME_SIGNAL SIGALRM 410 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
413 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
418 sigemptyset(&sigset);
419 sigaddset(&sigset, RESUME_SIGNAL);
420 pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
430 strlcpy(buff,
"\nModule: ", moduleBufferSize);
434 strlcat(buff,
"none", moduleBufferSize);
441 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
446 const auto self = pthread_self();
450 struct sigaction act;
451 act.sa_sigaction = sig_pause_for_stacktrace;
453 sigemptyset(&act.sa_mask);
454 sigaction(PAUSE_SIGNAL, &act,
NULL);
457 sigset_t pausesigset;
458 sigemptyset(&pausesigset);
459 sigaddset(&pausesigset, PAUSE_SIGNAL);
460 sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
463 for (
auto id : tids) {
465 pthread_kill(
id, PAUSE_SIGNAL);
471 act.sa_sigaction = sig_resume_handler;
472 sigaction(RESUME_SIGNAL, &act,
NULL);
477 const char* signalname =
"unknown";
481 signalname =
"bus error";
486 signalname =
"segmentation violation";
491 signalname =
"illegal instruction";
496 signalname =
"external termination request";
502 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
503 full_cerr_write(signalname);
504 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
513 std::size_t notified = 0;
515 for (
auto id : tids) {
517 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
523 full_cerr_write(
"\nCurrent Modules:\n");
530 if (tids.count(
self) > 0) {
531 char buff[moduleBufferSize] =
"\nModule: ";
535 strlcat(buff,
"none", moduleBufferSize);
537 strlcat(buff,
" (crashed)", moduleBufferSize);
538 full_cerr_write(buff);
540 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
548 timespec
t = { 0, 1000 };
556 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
557 full_cerr_write(signalname);
558 full_cerr_write(
"\n");
562 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
564 signal(sig, SIG_DFL);
573 void sig_abort(
int sig, siginfo_t*,
void*) {
574 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
577 signal(sig, SIG_DFL);
586 void set_default_signals() {
587 signal(SIGILL, SIG_DFL);
588 signal(SIGSEGV, SIG_DFL);
589 signal(SIGBUS, SIG_DFL);
590 signal(SIGTERM, SIG_DFL);
610 char buf[2]; buf[1] =
'\0';
614 int result = full_read(fromParent, buf, 1);
620 set_default_signals();
622 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
623 full_cerr_write(strerror(-result));
624 full_cerr_write(
"\n");
629 set_default_signals();
631 full_write(toParent, buf);
633 else if (buf[0] ==
'2')
642 else if (buf[0] ==
'3')
648 set_default_signals();
650 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
651 full_cerr_write(buf);
652 full_cerr_write(
"\n");
663 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
664 full_cerr_write(strerror(-result));
665 full_cerr_write(
"\n");
668 char buf[2]; buf[1] =
'\0';
671 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
672 if (result == -ETIMEDOUT)
674 full_cerr_write(
"timed out waiting for GDB to complete.");
678 full_cerr_write(strerror(-result));
680 full_cerr_write(
"\n");
687 char child_stack[4*1024];
688 char *child_stack_ptr = child_stack + 4*1024;
698 if (child_stack_ptr) {}
703 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
708 if (waitpid(pid, &status, 0) == -1)
710 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
714 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
726 syscall(SYS_execve,
"/bin/sh", argv, __environ);
728 execv(
"/bin/sh", argv);
736 void localInitializeThisThreadForUse() {
737 static thread_local TThread guard;
740 class InitializeThreadTask :
public tbb::task {
742 InitializeThreadTask(std::atomic<unsigned int>*
counter,
743 tbb::task* waitingTask):
747 tbb::task*
execute()
override {
752 localInitializeThisThreadForUse();
788 gSystem->ResetSignal(kSigChild);
789 gSystem->ResetSignal(kSigBus);
790 gSystem->ResetSignal(kSigSegmentationViolation);
791 gSystem->ResetSignal(kSigIllegalInstruction);
792 gSystem->ResetSignal(kSigSystem);
793 gSystem->ResetSignal(kSigPipe);
794 gSystem->ResetSignal(kSigAlarm);
795 gSystem->ResetSignal(kSigUrgent);
796 gSystem->ResetSignal(kSigFloatingException);
797 gSystem->ResetSignal(kSigWindowChanged);
803 gSystem->ResetSignal(kSigBus);
804 gSystem->ResetSignal(kSigSegmentationViolation);
805 gSystem->ResetSignal(kSigIllegalInstruction);
829 std::atomic<unsigned int> threadsLeft{nThreads};
831 std::shared_ptr<tbb::empty_task> waitTask{
new (tbb::task::allocate_root()) tbb::empty_task{},
834 waitTask->set_ref_count(1+nThreads);
835 for(
unsigned int i=0;
i<nThreads;++
i) {
836 tbb::task::spawn( *(
new(tbb::task::allocate_root()) InitializeThreadTask(&threadsLeft, waitTask.get())));
839 waitTask->wait_for_all();
854 SetErrorHandler(RootErrorHandler);
859 gInterpreter->SetClassAutoloading(1);
863 TTree::SetMaxTreeSize(kMaxLong64);
864 TH1::AddDirectory(kFALSE);
871 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
882 if (imt) ROOT::EnableImplicitMT();
887 TIter iter(gROOT->GetListOfFiles());
888 TObject *
obj =
nullptr;
889 while(
nullptr != (obj = iter.Next())) {
890 TFile*
f =
dynamic_cast<TFile*
>(
obj);
895 iter = TIter(gROOT->GetListOfFiles());
902 TThread::Initialize();
904 TObject::SetObjectStat(
false);
907 TVirtualStreamerInfo::Optimize(
false);
911 localInitializeThisThreadForUse();
916 desc.
setComment(
"Centralized interface to ROOT.");
918 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
920 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
922 ->setComment(
"If True, enables automatic loading of data dictionaries.");
924 ->setComment(
"If True, loads all ROOT dictionaries.");
926 ->setComment(
"If True, calls ROOT::EnableImplicitMT().");
928 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
930 ->setComment(
"Sets ROOT's gDebug value.");
932 ->setComment(
"Seconds to pause other threads during stack trace.");
933 descriptions.
add(
"InitRootHandlers", desc);
943 s_ignoreWarnings =
false;
948 s_ignoreWarnings =
true;
963 "set pagination no\n" 964 "thread apply all bt\n" 966 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
968 std::ostringstream sstr;
969 sstr <<
"Unable to pre-allocate stacktrace handler information";
986 std::ostringstream sstr;
987 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
996 std::ostringstream sstr;
997 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
void on_scheduler_entry(bool)
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
std::atomic< unsigned int > * threadsLeft_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
virtual void initializeThisThreadForUse() override
virtual void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
virtual ~InitRootHandlers()
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)