21 #include "tbb/task_scheduler_observer.h"
22 #include "tbb/concurrent_unordered_set.h"
40 #include "TInterpreter.h"
43 #include "TUnixSystem.h"
45 #include "TVirtualStreamerInfo.h"
48 #include "TClassTable.h"
55 constexpr std::size_t moduleBufferSize = 128;
154 static thread_local
bool s_ignoreWarnings =
false;
156 static bool s_ignoreEverything =
false;
158 void RootErrorHandlerImpl(
int level,
char const* location,
char const*
message) {
166 if (level >= kFatal) {
167 el_severity = SeverityLevel::kFatal;
168 }
else if (level >= kSysError) {
169 el_severity = SeverityLevel::kSysError;
170 }
else if (level >=
kError) {
176 if(s_ignoreEverything) {
187 if (message != 0) el_message =
message;
198 size_t index1 = el_message.find(precursor);
199 if (index1 != std::string::npos) {
200 size_t index2 = index1 + precursor.length();
201 size_t index3 = el_message.find_first_of(
" :", index2);
202 if (index3 != std::string::npos) {
203 size_t substrlen = index3-index2;
204 el_identifier +=
"-";
205 el_identifier += el_message.substr(index2,substrlen);
208 index1 = el_location.find(
"::");
209 if (index1 != std::string::npos) {
210 el_identifier +=
"/";
211 el_identifier += el_location.substr(0, index1);
217 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos)
218 && (el_message.find(
"fill branch") != std::string::npos)
219 && (el_message.find(
"address") != std::string::npos)
220 && (el_message.find(
"not set") != std::string::npos)) {
221 el_severity = SeverityLevel::kFatal;
224 if ((el_message.find(
"Tree branches") != std::string::npos)
225 && (el_message.find(
"different numbers of entries") != std::string::npos)) {
226 el_severity = SeverityLevel::kFatal;
232 if ((el_message.find(
"no dictionary for class") != std::string::npos) ||
233 (el_message.find(
"already in TClassTable") != std::string::npos) ||
234 (el_message.find(
"matrix not positive definite") != std::string::npos) ||
235 (el_message.find(
"not a TStreamerInfo object") != std::string::npos) ||
236 (el_message.find(
"Problems declaring payload") != std::string::npos) ||
237 (el_message.find(
"Announced number of args different from the real number of argument passed") != std::string::npos) ||
238 (el_location.find(
"Fit") != std::string::npos) ||
239 (el_location.find(
"TDecompChol::Solve") != std::string::npos) ||
240 (el_location.find(
"THistPainter::PaintInit") != std::string::npos) ||
241 (el_location.find(
"TUnixSystem::SetDisplay") != std::string::npos) ||
242 (el_location.find(
"TGClient::GetFontByName") != std::string::npos) ||
243 (el_location.find(
"Inverter::Dinv") != std::string::npos) ||
244 (el_message.find(
"nbins is <=0 - set to nbins = 1") != std::string::npos) ||
245 (el_message.find(
"nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
247 (el_location.find(
"CINTTypedefBuilder::Setup")!= std::string::npos) and
248 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
264 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
265 std::ostringstream sstr;
266 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
269 except.clearMessage();
277 if (el_severity == SeverityLevel::kFatal) {
279 }
else if (el_severity == SeverityLevel::kSysError) {
286 edm::LogInfo(
"Root_Information") << el_location << el_message ;
290 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
291 RootErrorHandlerImpl(level, location, message);
296 static int full_write(
int fd,
const char *
text)
298 const char *buffer =
text;
299 size_t count = strlen(text);
303 written =
write(fd, buffer, count);
306 if (errno == EINTR) {
continue;}
307 else {
return -errno;}
315 static int full_read(
int fd,
char *inbuf,
size_t len,
int timeout_s=-1)
319 ssize_t complete = 0;
326 else if ((-1 == (flags = fcntl(fd, F_GETFL))))
332 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
341 struct pollfd poll_info{
fd, POLLIN, 0};
343 if (ms_remaining > 0)
345 if (poll(&poll_info, 1, ms_remaining) == 0)
347 if ((flags & O_NONBLOCK) != O_NONBLOCK)
349 fcntl(fd, F_SETFL, flags);
354 else if (ms_remaining < 0)
356 if ((flags & O_NONBLOCK) != O_NONBLOCK)
358 fcntl(fd, F_SETFL, flags);
363 complete =
read(fd, buf, count);
366 if (errno == EINTR) {
continue;}
367 else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
continue;}
370 int orig_errno = errno;
371 if ((flags & O_NONBLOCK) != O_NONBLOCK)
373 fcntl(fd, F_SETFL, flags);
381 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
382 fcntl(fd, F_SETFL, flags);
387 static int full_cerr_write(
const char *text)
389 return full_write(2, text);
396 #if defined(SIGRTMAX)
397 #define PAUSE_SIGNAL SIGRTMAX
398 #define RESUME_SIGNAL SIGRTMAX-1
399 #elif defined(SIGINFO) // macOS/BSD
400 #define PAUSE_SIGNAL SIGINFO
401 #define RESUME_SIGNAL SIGALRM
405 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
408 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
409 using namespace edm::service;
413 sigemptyset(&sigset);
414 sigaddset(&sigset, RESUME_SIGNAL);
415 pthread_sigmask(SIG_UNBLOCK, &sigset, 0);
425 strlcpy(buff,
"\nModule: ", moduleBufferSize);
429 strlcat(buff,
"none", moduleBufferSize);
436 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
437 using namespace edm::service;
441 const auto self = pthread_self();
445 struct sigaction act;
446 act.sa_sigaction = sig_pause_for_stacktrace;
448 sigemptyset(&act.sa_mask);
449 sigaction(PAUSE_SIGNAL, &act,
NULL);
452 sigset_t pausesigset;
453 sigemptyset(&pausesigset);
454 sigaddset(&pausesigset, PAUSE_SIGNAL);
455 sigprocmask(SIG_UNBLOCK, &pausesigset, 0);
458 for (
auto id : tids) {
460 pthread_kill(
id, PAUSE_SIGNAL);
466 act.sa_sigaction = sig_resume_handler;
467 sigaction(RESUME_SIGNAL, &act,
NULL);
472 const char* signalname =
"unknown";
476 signalname =
"bus error";
481 signalname =
"segmentation violation";
486 signalname =
"illegal instruction";
491 signalname =
"external termination request";
497 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
498 full_cerr_write(signalname);
499 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
508 std::size_t notified = 0;
510 for (
auto id : tids) {
512 if (pthread_kill(
id, RESUME_SIGNAL) == 0) ++notified;
518 full_cerr_write(
"\nCurrent Modules:\n");
525 if (tids.count(
self) > 0) {
526 char buff[moduleBufferSize] =
"\nModule: ";
530 strlcat(buff,
"none", moduleBufferSize);
532 strlcat(buff,
" (crashed)", moduleBufferSize);
533 full_cerr_write(buff);
535 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
543 timespec
t = { 0, 1000 };
551 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
552 full_cerr_write(signalname);
553 full_cerr_write(
"\n");
557 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
559 signal(sig, SIG_DFL);
568 void sig_abort(
int sig, siginfo_t*,
void*) {
569 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
572 signal(sig, SIG_DFL);
581 void set_default_signals() {
582 signal(SIGILL, SIG_DFL);
583 signal(SIGSEGV, SIG_DFL);
584 signal(SIGBUS, SIG_DFL);
585 signal(SIGTERM, SIG_DFL);
605 char buf[2]; buf[1] =
'\0';
609 int result = full_read(fromParent, buf, 1);
615 set_default_signals();
617 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
618 full_cerr_write(strerror(-result));
619 full_cerr_write(
"\n");
624 set_default_signals();
626 full_write(toParent, buf);
628 else if (buf[0] ==
'2')
637 else if (buf[0] ==
'3')
643 set_default_signals();
645 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
646 full_cerr_write(buf);
647 full_cerr_write(
"\n");
658 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
659 full_cerr_write(strerror(-result));
660 full_cerr_write(
"\n");
663 char buf[2]; buf[1] =
'\0';
666 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
667 if (result == -ETIMEDOUT)
669 full_cerr_write(
"timed out waiting for GDB to complete.");
673 full_cerr_write(strerror(-result));
675 full_cerr_write(
"\n");
682 char child_stack[4*1024];
683 char *child_stack_ptr = child_stack + 4*1024;
693 if (child_stack_ptr) {}
698 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
703 if (waitpid(pid, &status, 0) == -1)
705 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
709 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
721 syscall(SYS_execve,
"/bin/sh", argv, __environ);
723 execv(
"/bin/sh", argv);
731 void localInitializeThisThreadForUse() {
732 static thread_local TThread guard;
735 class InitializeThreadTask :
public tbb::task {
737 InitializeThreadTask(std::atomic<unsigned int>*
counter,
738 tbb::task* waitingTask):
742 tbb::task*
execute()
override {
747 localInitializeThisThreadForUse();
774 unloadSigHandler_(pset.getUntrackedParameter<bool> (
"UnloadRootSigHandler")),
775 resetErrHandler_(pset.getUntrackedParameter<bool> (
"ResetRootErrHandler")),
776 loadAllDictionaries_(pset.getUntrackedParameter<bool>(
"LoadAllDictionaries")),
777 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<bool> (
"AutoLibraryLoader"))
783 gSystem->ResetSignal(kSigChild);
784 gSystem->ResetSignal(kSigBus);
785 gSystem->ResetSignal(kSigSegmentationViolation);
786 gSystem->ResetSignal(kSigIllegalInstruction);
787 gSystem->ResetSignal(kSigSystem);
788 gSystem->ResetSignal(kSigPipe);
789 gSystem->ResetSignal(kSigAlarm);
790 gSystem->ResetSignal(kSigUrgent);
791 gSystem->ResetSignal(kSigFloatingException);
792 gSystem->ResetSignal(kSigWindowChanged);
798 gSystem->ResetSignal(kSigBus);
799 gSystem->ResetSignal(kSigSegmentationViolation);
800 gSystem->ResetSignal(kSigIllegalInstruction);
824 std::atomic<unsigned int> threadsLeft{nThreads};
826 std::shared_ptr<tbb::empty_task> waitTask{
new (tbb::task::allocate_root()) tbb::empty_task{},
829 waitTask->set_ref_count(1+nThreads);
830 for(
unsigned int i=0;
i<nThreads;++
i) {
831 tbb::task::spawn( *(
new(tbb::task::allocate_root()) InitializeThreadTask(&threadsLeft, waitTask.get())));
834 waitTask->wait_for_all();
849 SetErrorHandler(RootErrorHandler);
854 gInterpreter->SetClassAutoloading(1);
858 TTree::SetMaxTreeSize(kMaxLong64);
859 TH1::AddDirectory(kFALSE);
866 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int> >))) {
878 TIter iter(gROOT->GetListOfFiles());
879 TObject *
obj =
nullptr;
880 while(
nullptr != (obj = iter.Next())) {
881 TFile*
f =
dynamic_cast<TFile*
>(
obj);
886 iter = TIter(gROOT->GetListOfFiles());
893 TThread::Initialize();
895 TObject::SetObjectStat(
false);
898 TVirtualStreamerInfo::Optimize(
false);
902 localInitializeThisThreadForUse();
907 desc.
setComment(
"Centralized interface to ROOT.");
909 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
911 ->setComment(
"If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
913 ->setComment(
"If True, enables automatic loading of data dictionaries.");
915 ->setComment(
"If True, loads all ROOT dictionaries.");
917 ->setComment(
"If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
919 ->setComment(
"Sets ROOT's gDebug value.");
921 ->setComment(
"Seconds to pause other threads during stack trace.");
922 descriptions.
add(
"InitRootHandlers", desc);
932 s_ignoreWarnings =
false;
937 s_ignoreWarnings =
true;
946 "set pagination no\n"
947 "thread apply all bt\n"
949 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >=
pidStringLength_)
951 std::ostringstream sstr;
952 sstr <<
"Unable to pre-allocate stacktrace handler information";
969 std::ostringstream sstr;
970 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
979 std::ostringstream sstr;
980 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
bool loadAllDictionaries_
virtual void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Container_type threadIDs_
void on_scheduler_entry(bool)
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void cachePidInfoHandler(unsigned int, unsigned int)
std::atomic< unsigned int > * threadsLeft_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
std::shared_ptr< const void > sigIllHandler_
virtual void initializeThisThreadForUse() override
virtual void ignoreWarnings_() override
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
virtual ~InitRootHandlers()
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void watchPostForkReacquireResources(PostForkReacquireResources::slot_type const &iSlot)
virtual void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static std::atomic< unsigned int > counter
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
tuple size
Write out results.