21 #include "tbb/task_scheduler_observer.h"
22 #include "tbb/concurrent_unordered_set.h"
40 #include "TInterpreter.h"
43 #include "TUnixSystem.h"
45 #include "TVirtualStreamerInfo.h"
48 #include "TClassTable.h"
55 constexpr std::size_t moduleBufferSize = 128;
154 static thread_local
bool s_ignoreWarnings =
false;
156 static bool s_ignoreEverything =
false;
158 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
166 if (level >= kFatal) {
167 el_severity = SeverityLevel::kFatal;
168 }
else if (level >= kSysError) {
169 el_severity = SeverityLevel::kSysError;
170 }
else if (level >=
kError) {
176 if(s_ignoreEverything) {
187 if (message != 0) el_message =
message;
198 size_t index1 = el_message.find(precursor);
199 if (index1 != std::string::npos) {
200 size_t index2 = index1 + precursor.length();
201 size_t index3 = el_message.find_first_of(
" :", index2);
202 if (index3 != std::string::npos) {
203 size_t substrlen = index3-index2;
204 el_identifier +=
"-";
205 el_identifier += el_message.substr(index2,substrlen);
208 index1 = el_location.find(
"::");
209 if (index1 != std::string::npos) {
210 el_identifier +=
"/";
211 el_identifier += el_location.substr(0, index1);
217 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
218 && (el_message.find(
"fill branch") != std::string::npos)
219 && (el_message.find(
"address") != std::string::npos)
220 && (el_message.find(
"not set") != std::string::npos)) {
221 el_severity = SeverityLevel::kFatal;
224 if ((el_message.find(
"Tree branches") != std::string::npos)
225 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
226 el_severity = SeverityLevel::kFatal;
232 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
233 (el_message.find(
"already in TClassTable") != std::string::npos) ||
234 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
235 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
236 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
237 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
238 (el_location.find(
"Fit") != std::string::npos) ||
239 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
240 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
241 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
242 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
243 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
244 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
245 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
247 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
248 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
264 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
265 std::ostringstream sstr;
266 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
269 except.clearMessage();
277 if (el_severity == SeverityLevel::kFatal) {
279 }
else if (el_severity == SeverityLevel::kSysError) {
286 edm::LogInfo(
"Root_Information") << el_location << el_message ;
290 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
291 RootErrorHandlerImpl(level, location, message);
296 static int full_write(
int fd,
const char *
text)
298 const char *buffer =
text;
299 size_t count = strlen(text);
303 written =
write(fd, buffer, count);
306 if (errno == EINTR) {
continue;}
307 else {
return -errno;}
315 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
319 ssize_t complete = 0;
326 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
332 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
341 struct pollfd poll_info{
fd, POLLIN, 0};
343 if (ms_remaining > 0)
345 if (poll(&poll_info, 1, ms_remaining) == 0)
347 if ((flags & O_NONBLOCK) != O_NONBLOCK)
349 fcntl(fd, F_SETFL, flags);
354 else if (ms_remaining < 0)
356 if ((flags & O_NONBLOCK) != O_NONBLOCK)
358 fcntl(fd, F_SETFL, flags);
363 complete =
read(fd, buf, count);
366 if (errno == EINTR) {
continue;}
367 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
370 int orig_errno = errno;
371 if ((flags & O_NONBLOCK) != O_NONBLOCK)
373 fcntl(fd, F_SETFL, flags);
381 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
382 fcntl(fd, F_SETFL, flags);
387 static int full_cerr_write(
const char *text)
389 return full_write(2, text);
396 #if defined(SIGRTMAX)
397 #define PAUSE_SIGNAL SIGRTMAX
398 #define RESUME_SIGNAL SIGRTMAX-1
399 #elif defined(SIGINFO) // macOS/BSD
400 #define PAUSE_SIGNAL SIGINFO
401 #define RESUME_SIGNAL SIGALRM
405 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
408 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
409 using namespace edm::service;
413 sigemptyset(&sigset);
414 sigaddset(&sigset, RESUME_SIGNAL);
415 pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
425 strlcpy(buff,
"\nModule: ", moduleBufferSize);
429 strlcat(buff,
"none", moduleBufferSize);
436 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
437 using namespace edm::service;
441 const auto self = pthread_self();
445 struct sigaction act;
446 act.sa_sigaction = sig_pause_for_stacktrace;
448 sigemptyset(&act.sa_mask);
449 sigaction(PAUSE_SIGNAL, &act,
NULL);
452 sigset_t pausesigset;
453 sigemptyset(&pausesigset);
454 sigaddset(&pausesigset, PAUSE_SIGNAL);
455 sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
458 for (
auto id : tids) {
460 pthread_kill(
id, PAUSE_SIGNAL);
466 act.sa_sigaction = sig_resume_handler;
467 sigaction(RESUME_SIGNAL, &act,
NULL);
472 const char* signalname =
"unknown";
476 signalname =
"bus error";
481 signalname =
"segmentation violation";
486 signalname =
"illegal instruction";
491 signalname =
"external termination request";
497 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
498 full_cerr_write(signalname);
499 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
508 std::size_t notified = 0;
510 for (
auto id : tids) {
512 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
518 full_cerr_write(
"\nCurrent Modules:\n");
525 if (tids.count(
self) > 0) {
526 char buff[moduleBufferSize] =
"\nModule: ";
530 strlcat(buff,
"none", moduleBufferSize);
532 strlcat(buff,
" (crashed)", moduleBufferSize);
533 full_cerr_write(buff);
535 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
543 timespec
t = { 0, 1000 };
551 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
552 full_cerr_write(signalname);
553 full_cerr_write(
"\n");
557 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
559 signal(sig, SIG_DFL);
568 void sig_abort(
int sig, siginfo_t*,
void*) {
573 void set_default_signals() {
574 signal(SIGILL, SIG_DFL);
575 signal(SIGSEGV, SIG_DFL);
576 signal(SIGBUS, SIG_DFL);
577 signal(SIGTERM, SIG_DFL);
597 char buf[2]; buf[1] =
'\0';
601 int result = full_read(fromParent, buf, 1);
607 set_default_signals();
609 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
610 full_cerr_write(strerror(-result));
611 full_cerr_write(
"\n");
616 set_default_signals();
618 full_write(toParent, buf);
620 else if (buf[0] ==
'2')
629 else if (buf[0] ==
'3')
635 set_default_signals();
637 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
638 full_cerr_write(buf);
639 full_cerr_write(
"\n");
650 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
651 full_cerr_write(strerror(-result));
652 full_cerr_write(
"\n");
655 char buf[2]; buf[1] =
'\0';
658 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
659 if (result == -ETIMEDOUT)
661 full_cerr_write(
"timed out waiting for GDB to complete.");
665 full_cerr_write(strerror(-result));
667 full_cerr_write(
"\n");
674 char child_stack[4*1024];
675 char *child_stack_ptr = child_stack + 4*1024;
685 if (child_stack_ptr) {}
690 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
695 if (waitpid(pid, &status, 0) == -1)
697 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
701 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
713 syscall(SYS_execve,
"/bin/sh", argv, __environ);
715 execv(
"/bin/sh", argv);
723 void localInitializeThisThreadForUse() {
724 static thread_local TThread guard;
727 class InitializeThreadTask :
public tbb::task {
729 InitializeThreadTask(std::atomic<unsigned int>*
counter,
730 tbb::task* waitingTask):
734 tbb::task*
execute()
override {
739 localInitializeThisThreadForUse();
766 unloadSigHandler_(pset.getUntrackedParameter<bool> (
"UnloadRootSigHandler")),
767 resetErrHandler_(pset.getUntrackedParameter<bool> (
"ResetRootErrHandler")),
768 loadAllDictionaries_(pset.getUntrackedParameter<bool>(
"LoadAllDictionaries")),
769 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<bool> (
"AutoLibraryLoader"))
775 gSystem->ResetSignal(kSigChild);
776 gSystem->ResetSignal(kSigBus);
777 gSystem->ResetSignal(kSigSegmentationViolation);
778 gSystem->ResetSignal(kSigIllegalInstruction);
779 gSystem->ResetSignal(kSigSystem);
780 gSystem->ResetSignal(kSigPipe);
781 gSystem->ResetSignal(kSigAlarm);
782 gSystem->ResetSignal(kSigUrgent);
783 gSystem->ResetSignal(kSigFloatingException);
784 gSystem->ResetSignal(kSigWindowChanged);
790 gSystem->ResetSignal(kSigBus);
791 gSystem->ResetSignal(kSigSegmentationViolation);
792 gSystem->ResetSignal(kSigIllegalInstruction);
816 std::atomic<unsigned int> threadsLeft{nThreads};
818 std::shared_ptr<tbb::empty_task> waitTask{
new (tbb::task::allocate_root()) tbb::empty_task{},
821 waitTask->set_ref_count(1+nThreads);
822 for(
unsigned int i=0;
i<nThreads;++
i) {
823 tbb::task::spawn( *(
new(tbb::task::allocate_root()) InitializeThreadTask(&threadsLeft, waitTask.get())));
826 waitTask->wait_for_all();
841 SetErrorHandler(RootErrorHandler);
846 gInterpreter->SetClassAutoloading(1);
850 TTree::SetMaxTreeSize(kMaxLong64);
851 TH1::AddDirectory(kFALSE);
858 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
870 TIter iter(gROOT->GetListOfFiles());
871 TObject *
obj =
nullptr;
872 while(
nullptr != (obj = iter.Next())) {
873 TFile*
f =
dynamic_cast<TFile*
>(
obj);
878 iter = TIter(gROOT->GetListOfFiles());
885 TThread::Initialize();
887 TObject::SetObjectStat(
false);
890 TVirtualStreamerInfo::Optimize(
false);
894 localInitializeThisThreadForUse();
899 desc.
setComment(
"Centralized interface to ROOT.");
901 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
903 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
905 ->setComment(
"If True, enables automatic loading of data dictionaries.");
907 ->setComment(
"If True, loads all ROOT dictionaries.");
909 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
911 ->setComment(
"Sets ROOT's gDebug value.");
913 ->setComment(
"Seconds to pause other threads during stack trace.");
914 descriptions.
add(
"InitRootHandlers", desc);
924 s_ignoreWarnings =
false;
929 s_ignoreWarnings =
true;
938 "set pagination no\n"
939 "thread apply all bt\n"
941 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
943 std::ostringstream sstr;
944 sstr <<
"Unable to pre-allocate stacktrace handler information";
961 std::ostringstream sstr;
962 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
971 std::ostringstream sstr;
972 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Container_type threadIDs_
void on_scheduler_entry(bool)
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
std::atomic< unsigned int > * threadsLeft_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
std::shared_ptr< const void > sigIllHandler_
virtual void initializeThisThreadForUse() override
virtual void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
virtual ~InitRootHandlers()
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static std::atomic< unsigned int > counter
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
tuple size
Write out results.