#include <Vulture.h>
Public Member Functions | |
int | hasStarted () |
int | hasStopped () |
pid_t | kill () |
pid_t | makeProcess () |
void | retrieve_corefile (char *, char *, uint64_t) |
int | start (std::string, int=0) |
int | stop () |
Vulture (bool) | |
virtual | ~Vulture () |
Private Member Functions | |
void | analyze () |
bool | control (toolbox::task::WorkLoop *) |
bool | prowling (toolbox::task::WorkLoop *) |
void | startProwling () |
Private Attributes | |
toolbox::task::ActionSignature * | asCtrl_ |
toolbox::task::ActionSignature * | asProwl_ |
std::vector< std::string > | currentCoreList_ |
bool | handicapped_ |
std::string | iDieUrl_ |
time_t | lastUpdate_ |
MasterQueue * | mq_ |
unsigned int | newCores_ |
CurlPoster * | poster_ |
bool | prowling_ |
bool | running_ |
SlaveQueue * | sq_ |
int | started_ |
int | stopped_ |
DIR * | tmp_ |
bool | updateMode_ |
pid_t | vulturePid_ |
toolbox::task::WorkLoop * | wlCtrl_ |
toolbox::task::WorkLoop * | wlProwl_ |
Static Private Attributes | |
static const std::string | FS = "/tmp" |
static const int | vulture_queue_offset = 400 |
evf::Vulture::Vulture | ( | bool | push | ) |
Definition at line 117 of file Vulture.cc.
: wlCtrl_(0) , asCtrl_(0) , running_(false) , wlProwl_(0) , asProwl_(0) , prowling_(false) , iDieUrl_("") , updateMode_(push) , vulturePid_(0) , tmp_(0) , newCores_(0) , poster_(0) , mq_(new MasterQueue(vulture_queue_offset)) , sq_(0) // this is only defined in the forked process , started_(-1) , stopped_(-1) , handicapped_(false) { // create command file for gdb, if not already there std::ifstream vulture("/tmp/vulture.cmd"); if (!vulture.good()) { FILE *outf = fopen("/tmp/vulture.cmd","w"); fprintf(outf,"where\n"); fclose(outf); } }
evf::Vulture::~Vulture | ( | ) | [virtual] |
void evf::Vulture::analyze | ( | void | ) | [private] |
Definition at line 398 of file Vulture.cc.
References edmPickEvents::command, gather_cfg::cout, currentCoreList_, FS, i, newCores_, AlCaHLTBitMon_ParallelJobs::p, evf::utils::pid, poster_, evf::CurlPoster::postString(), alignCSCRings::s, and svgfig::stack.
Referenced by prowling().
{ // do a first analysis of the coredump if(newCores_==0) return; for(unsigned int i = currentCoreList_.size()-newCores_; i < currentCoreList_.size(); i++){ std::string command = "gdb /opt/xdaq/bin/xdaq.exe -batch -x /tmp/vulture.cmd -c /tmp/"; std::string cmdout; command += currentCoreList_[i]; std::string filePathAndName = FS + "/"; filePathAndName += currentCoreList_[i]; std::string pid = currentCoreList_[i].substr(currentCoreList_[i].find_first_of(".")+1, currentCoreList_[i].length()); FILE *ps = popen(command.c_str(),"r"); size_t s = 256; char *p=new char[s]; bool filter = false; while(getline(&p,&s,ps) != -1){ if(strncmp("Core",p,4)==0) filter = true; if(filter)cmdout += p; } delete[] p; pclose(ps); int errsv = 0; int rch = chmod(filePathAndName.c_str(),0777); if(rch != 0){ errsv = errno; std::cout << "ERROR: couldn't change corefile access privileges -" << strerror(errsv)<< std::endl; } unsigned int ipid = (unsigned int)atoi(pid.c_str()); poster_->postString(cmdout.c_str(),cmdout.length(),ipid, CurlPoster::stack); } }
bool evf::Vulture::control | ( | toolbox::task::WorkLoop * | wl | ) | [private] |
Definition at line 295 of file Vulture.cc.
References evf::CurlPoster::check(), gather_cfg::cout, alignCSCRings::e, lumiQueryAPI::msg, MSQM_MESSAGE_TYPE_NOP, MSQM_VULTURE_TYPE_STA, MSQM_VULTURE_TYPE_STP, MSQS_VULTURE_TYPE_ACK, evf::SlaveQueue::post(), poster_, prowling_, evf::SlaveQueue::rcv(), evf::vulture_start_message::run_, sq_, startProwling(), and evf::vulture_start_message::url_.
Referenced by makeProcess().
{ MsgBuf msg; unsigned long mtype = MSQM_MESSAGE_TYPE_NOP; try{mtype = sq_->rcv(msg);}catch(evf::Exception &e){ std::cout << "Vulture::exception on msgrcv for control, bailing out of control workloop - good bye" << std::endl; return false; } mtype = msg->mtype; switch(mtype){ case MSQM_VULTURE_TYPE_STA: { vulture_start_message *sta = (vulture_start_message*)msg->mtext; if(poster_ == 0) poster_ = new CurlPoster(sta->url_); if(poster_->check(sta->run_)){ try{ startProwling(); MsgBuf msg1(0,MSQS_VULTURE_TYPE_ACK) ; sq_->post(msg1); } catch(evf::Exception &e) { std::cout << "Vulture::start - exception in starting prowling workloop " << e.what() << std::endl; //@EM ToDo generate some message here } }else{ std::cout << "Vulture::start - could not contact iDie - chech Url - will not start prowling loop" << std::endl; prowling_ = false; } break; } case MSQM_VULTURE_TYPE_STP: { prowling_ = false; break; } default: { // do nothing @EM ToDo generate an appropriate error message } } return true; }
int evf::Vulture::hasStarted | ( | ) |
Definition at line 215 of file Vulture.cc.
References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and started_.
Referenced by evf::FUEventProcessor::updater().
{ if(started_<0){ MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK); try{ mq_->rcvNonBlocking(msg2); started_ = 0; } catch(evf::Exception &e){ } } else {started_ = 1;} return started_; }
int evf::Vulture::hasStopped | ( | ) |
Definition at line 228 of file Vulture.cc.
References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and stopped_.
{ if(stopped_<0){ MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK); try{ mq_->rcvNonBlocking(msg2); stopped_ = 0; } catch(evf::Exception &e){ } } else {stopped_ = 1;} return stopped_; }
pid_t evf::Vulture::kill | ( | ) |
Definition at line 263 of file Vulture.cc.
References vulturePid_.
{ ::kill (vulturePid_, SIGKILL); int sl; pid_t killedOrNot = waitpid(vulturePid_,&sl,WNOHANG); vulturePid_ = 0; return killedOrNot; }
pid_t evf::Vulture::makeProcess | ( | ) |
Definition at line 154 of file Vulture.cc.
References asCtrl_, control(), gather_cfg::cout, alignCSCRings::e, Exception, FS, handicapped_, sq_, summarizeEdmComparisonLogfiles::success, tmp_, vulture_queue_offset, vulturePid_, and wlCtrl_.
Referenced by evf::FUEventProcessor::configuring().
{ pid_t retval = fork(); if(retval==0){ // we are in the forked process int success = -1; // #ifdef linux // success = prctl( PR_SET_DUMPABLE, 0 ); // #endif if(success != 0){ std::cout << "Vulture::could not set process undumpable" << std::endl; handicapped_ = true; } #ifdef linux success = prctl( PR_SET_PDEATHSIG, SIGKILL ); #endif if(success != 0){ std::cout << "Vulture::could not set process death signal" << std::endl; handicapped_ = true; } tmp_ = opendir(FS.c_str()); #ifdef linux success = prctl ( PR_SET_NAME , "vulture"); #endif if(success != 0){ std::cout << "Vulture::could not set process name" << std::endl; handicapped_ = true; } try{ pt::PeerTransport * ptr = pt::getPeerTransportAgent()->getPeerTransport("https","soap",pt::Receiver); delete ptr; } catch (pt::exception::PeerTransportNotFound & e ){ //do nothing here since we don't know what to do... ? } // freopen("/dev/null","w",stderr); sq_ = new SlaveQueue(vulture_queue_offset); // start the ctrl workloop try { wlCtrl_= toolbox::task::getWorkLoopFactory()->getWorkLoop("Ctrll", "waiting"); if (!wlCtrl_->isActive()) wlCtrl_->activate(); asCtrl_ = toolbox::task::bind(this,&Vulture::control, "Ctrl"); wlCtrl_->submit(asCtrl_); } catch (xcept::Exception& e) { std::cout << "Vulture:constructor - could not start workloop 'Ctrl' for process " << retval << std::endl; } } else{ vulturePid_ = retval; } return retval; }
bool evf::Vulture::prowling | ( | toolbox::task::WorkLoop * | wl | ) | [private] |
Definition at line 343 of file Vulture.cc.
References analyze(), gather_cfg::cout, currentCoreList_, alignCSCRings::e, lastUpdate_, newCores_, cmsPerfSuiteHarvest::now, poster_, evf::CurlPoster::postString(), prowling_, stor::utils::sleep(), evf::CurlPoster::stack, testRegression::stat, and tmp_.
Referenced by startProwling().
{ if(!prowling_){ char messageDie[5]; sprintf(messageDie,"Dead"); if(poster_==0){ std::cout << "Vulture: asked to stop prowling but no poster " << std::endl; return false; } try{ poster_->postString(messageDie,5,0,CurlPoster::stack); } catch(evf::Exception &e){ //do nothing just swallow the exception } std::cout << "Received STOP message, going to delete poster " << std::endl; // delete poster_; // poster_=0; return false; } newCores_ = 0; struct stat filestat; timeval now; gettimeofday(&now,0); // examine /tmp looking for new coredumps dirent *dirp; while((dirp = readdir(tmp_))!=0){ if(strncmp(dirp->d_name,"core",4)==0){ stat(dirp->d_name,&filestat); if(filestat.st_mtime > lastUpdate_){ currentCoreList_.push_back(dirp->d_name); newCores_++; } } } rewinddir(tmp_); lastUpdate_ = now.tv_sec; try{ analyze(); } catch(evf::Exception &e){ std::cout << "Vulture cannot send to iDie server, bail out " << std::endl; return false; } ::sleep(60); return true; }
void evf::Vulture::retrieve_corefile | ( | char * | , |
char * | , | ||
uint64_t | |||
) |
pid_t evf::Vulture::start | ( | std::string | url, |
int | run = 0 |
||
) |
Definition at line 241 of file Vulture.cc.
References mq_, MSQM_VULTURE_TYPE_STA, evf::MasterQueue::post(), DTTTrigCorrFirst::run, evf::vulture_start_message::run_, stopped_, evf::vulture_start_message::url_, and vulturePid_.
Referenced by evf::FUEventProcessor::enabling().
{ //communicate start-of-run to Vulture vulture_start_message stamsg; strcpy(stamsg.url_,url.c_str()); stamsg.run_ = run; MsgBuf msg1(sizeof(vulture_start_message),MSQM_VULTURE_TYPE_STA); memcpy(msg1->mtext,&stamsg,sizeof(vulture_start_message)); mq_->post(msg1); stopped_ = -1; return vulturePid_; }
void evf::Vulture::startProwling | ( | ) | [private] |
Definition at line 272 of file Vulture.cc.
References asProwl_, alignCSCRings::e, Exception, lastUpdate_, lumiQueryAPI::msg, cmsPerfSuiteHarvest::now, prowling(), prowling_, and wlProwl_.
Referenced by control().
{ timeval now; gettimeofday(&now,0); lastUpdate_ = now.tv_sec; prowling_ = true; try { wlProwl_= toolbox::task::getWorkLoopFactory()->getWorkLoop("Prowl", "waiting"); if (!wlProwl_->isActive()) wlProwl_->activate(); asProwl_ = toolbox::task::bind(this,&Vulture::prowling, "Prowl"); wlProwl_->submit(asProwl_); } catch (xcept::Exception& e) { std::string msg = "Failed to start workloop 'Prowl'."; XCEPT_RETHROW(evf::Exception,msg,e); } }
pid_t evf::Vulture::stop | ( | ) |
Definition at line 254 of file Vulture.cc.
References mq_, MSQM_VULTURE_TYPE_STP, NUMERIC_MESSAGE_SIZE, evf::MasterQueue::post(), started_, and vulturePid_.
Referenced by evf::FUEventProcessor::stopping().
{ MsgBuf msg1(NUMERIC_MESSAGE_SIZE,MSQM_VULTURE_TYPE_STP); mq_->post(msg1); started_ = -1; return vulturePid_; }
toolbox::task::ActionSignature* evf::Vulture::asCtrl_ [private] |
Definition at line 61 of file Vulture.h.
Referenced by makeProcess().
toolbox::task::ActionSignature* evf::Vulture::asProwl_ [private] |
Definition at line 64 of file Vulture.h.
Referenced by startProwling().
std::vector<std::string> evf::Vulture::currentCoreList_ [private] |
Definition at line 70 of file Vulture.h.
Referenced by analyze(), and prowling().
const std::string evf::Vulture::FS = "/tmp" [static, private] |
Definition at line 59 of file Vulture.h.
Referenced by analyze(), and makeProcess().
bool evf::Vulture::handicapped_ [private] |
Definition at line 78 of file Vulture.h.
Referenced by makeProcess().
std::string evf::Vulture::iDieUrl_ [private] |
time_t evf::Vulture::lastUpdate_ [private] |
Definition at line 71 of file Vulture.h.
Referenced by prowling(), and startProwling().
MasterQueue* evf::Vulture::mq_ [private] |
Definition at line 74 of file Vulture.h.
Referenced by hasStarted(), hasStopped(), start(), stop(), and ~Vulture().
unsigned int evf::Vulture::newCores_ [private] |
Definition at line 72 of file Vulture.h.
Referenced by analyze(), and prowling().
CurlPoster* evf::Vulture::poster_ [private] |
Definition at line 73 of file Vulture.h.
Referenced by analyze(), control(), prowling(), and ~Vulture().
bool evf::Vulture::prowling_ [private] |
Definition at line 65 of file Vulture.h.
Referenced by control(), prowling(), and startProwling().
bool evf::Vulture::running_ [private] |
SlaveQueue* evf::Vulture::sq_ [private] |
Definition at line 75 of file Vulture.h.
Referenced by control(), makeProcess(), and ~Vulture().
int evf::Vulture::started_ [private] |
Definition at line 76 of file Vulture.h.
Referenced by hasStarted(), and stop().
int evf::Vulture::stopped_ [private] |
Definition at line 77 of file Vulture.h.
Referenced by hasStopped(), and start().
DIR* evf::Vulture::tmp_ [private] |
Definition at line 69 of file Vulture.h.
Referenced by makeProcess(), and prowling().
bool evf::Vulture::updateMode_ [private] |
const int evf::Vulture::vulture_queue_offset = 400 [static, private] |
Definition at line 53 of file Vulture.h.
Referenced by makeProcess().
pid_t evf::Vulture::vulturePid_ [private] |
toolbox::task::WorkLoop* evf::Vulture::wlCtrl_ [private] |
Definition at line 60 of file Vulture.h.
Referenced by makeProcess().
toolbox::task::WorkLoop* evf::Vulture::wlProwl_ [private] |
Definition at line 63 of file Vulture.h.
Referenced by startProwling().