CMS 3D CMS Logo

Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes

evf::Vulture Class Reference

#include <Vulture.h>

List of all members.

Public Member Functions

int hasStarted ()
int hasStopped ()
pid_t kill ()
pid_t makeProcess ()
void retrieve_corefile (char *, char *, uint64_t)
int start (std::string, int=0)
int stop ()
 Vulture (bool)
virtual ~Vulture ()

Private Member Functions

void analyze ()
bool control (toolbox::task::WorkLoop *)
bool prowling (toolbox::task::WorkLoop *)
void startProwling ()

Private Attributes

toolbox::task::ActionSignature * asCtrl_
toolbox::task::ActionSignature * asProwl_
std::vector< std::string > currentCoreList_
bool handicapped_
std::string iDieUrl_
time_t lastUpdate_
MasterQueuemq_
unsigned int newCores_
CurlPosterposter_
bool prowling_
bool running_
SlaveQueuesq_
int started_
int stopped_
DIR * tmp_
bool updateMode_
pid_t vulturePid_
toolbox::task::WorkLoop * wlCtrl_
toolbox::task::WorkLoop * wlProwl_

Static Private Attributes

static const std::string FS = "/tmp"
static const int vulture_queue_offset = 400

Detailed Description

Definition at line 36 of file Vulture.h.


Constructor & Destructor Documentation

evf::Vulture::Vulture ( bool  push)

Definition at line 117 of file Vulture.cc.

    : wlCtrl_(0)
    , asCtrl_(0)
    , running_(false)
    , wlProwl_(0)
    , asProwl_(0)
    , prowling_(false)
    , iDieUrl_("")
    , updateMode_(push)
    , vulturePid_(0)
    , tmp_(0)
    , newCores_(0)
    , poster_(0)
    , mq_(new MasterQueue(vulture_queue_offset))
    , sq_(0) // this is only defined in the forked process
    , started_(-1)
    , stopped_(-1)
    , handicapped_(false)
  {
    // create command file for gdb, if not already there
        std::ifstream vulture("/tmp/vulture.cmd");
        if (!vulture.good())
        {
                FILE *outf = fopen("/tmp/vulture.cmd","w");
                fprintf(outf,"where\n");
                fclose(outf);
        }

  }
evf::Vulture::~Vulture ( ) [virtual]

Definition at line 147 of file Vulture.cc.

References mq_, poster_, and sq_.

  {
    delete mq_;
    if(sq_ != 0) delete sq_;
    if(poster_ != 0) delete poster_;
  }

Member Function Documentation

void evf::Vulture::analyze ( void  ) [private]

Definition at line 398 of file Vulture.cc.

References edmPickEvents::command, gather_cfg::cout, currentCoreList_, FS, i, newCores_, AlCaHLTBitMon_ParallelJobs::p, evf::utils::pid, poster_, evf::CurlPoster::postString(), alignCSCRings::s, and svgfig::stack.

Referenced by prowling().

  {
    // do a first analysis of the coredump
    if(newCores_==0) return;
    for(unsigned int i = currentCoreList_.size()-newCores_; 
        i < currentCoreList_.size();
        i++){
      std::string command = "gdb /opt/xdaq/bin/xdaq.exe -batch -x /tmp/vulture.cmd -c /tmp/";
      std::string cmdout;
      command += currentCoreList_[i];
      std::string filePathAndName = FS + "/";
      filePathAndName += currentCoreList_[i];
      std::string pid = 
        currentCoreList_[i].substr(currentCoreList_[i].find_first_of(".")+1,
                                   currentCoreList_[i].length());

      FILE *ps = popen(command.c_str(),"r");
      size_t s = 256;
      char *p=new char[s];
      bool filter = false;
      while(getline(&p,&s,ps) != -1){
        if(strncmp("Core",p,4)==0) filter = true;
        if(filter)cmdout += p;
      }
      delete[] p;
      pclose(ps);
      int errsv = 0;
      int rch = chmod(filePathAndName.c_str(),0777);
      if(rch != 0){
        errsv = errno;
        std::cout << "ERROR: couldn't change corefile access privileges -" 
                  << strerror(errsv)<< std::endl;
      }
      unsigned int ipid = (unsigned int)atoi(pid.c_str());
      poster_->postString(cmdout.c_str(),cmdout.length(),ipid, CurlPoster::stack); 
      
    }
  }
bool evf::Vulture::control ( toolbox::task::WorkLoop *  wl) [private]

Definition at line 295 of file Vulture.cc.

References evf::CurlPoster::check(), gather_cfg::cout, alignCSCRings::e, lumiQueryAPI::msg, MSQM_MESSAGE_TYPE_NOP, MSQM_VULTURE_TYPE_STA, MSQM_VULTURE_TYPE_STP, MSQS_VULTURE_TYPE_ACK, evf::SlaveQueue::post(), poster_, prowling_, evf::SlaveQueue::rcv(), evf::vulture_start_message::run_, sq_, startProwling(), and evf::vulture_start_message::url_.

Referenced by makeProcess().

  {

    MsgBuf msg;
    unsigned long mtype = MSQM_MESSAGE_TYPE_NOP;
    try{mtype = sq_->rcv(msg);}catch(evf::Exception &e){
      std::cout << "Vulture::exception on msgrcv for control, bailing out of control workloop - good bye" << std::endl;
      return false;
    }
    mtype = msg->mtype;
    switch(mtype){
    case MSQM_VULTURE_TYPE_STA:
      {

        vulture_start_message *sta = (vulture_start_message*)msg->mtext;
        if(poster_ == 0) poster_ = new CurlPoster(sta->url_);
        if(poster_->check(sta->run_)){
          try{
            startProwling();
            MsgBuf msg1(0,MSQS_VULTURE_TYPE_ACK) ;
            sq_->post(msg1);
          }
          catch(evf::Exception &e)
            {
              std::cout << "Vulture::start - exception in starting prowling workloop " << e.what() << std::endl;
              //@EM ToDo generate some message here
            }     
        }else{
          std::cout << "Vulture::start - could not contact iDie - chech Url - will not start prowling loop" << std::endl;
          prowling_ = false;
        }
      
        break;
      }
    case MSQM_VULTURE_TYPE_STP:
      {
        prowling_ = false;
        break;
      }
    default:
      {
        // do nothing @EM ToDo generate an appropriate error message
      }
    }
    return true;
    
  }
int evf::Vulture::hasStarted ( )

Definition at line 215 of file Vulture.cc.

References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and started_.

Referenced by evf::FUEventProcessor::updater().

                         {
    if(started_<0){
      MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK);
      try{
        mq_->rcvNonBlocking(msg2);
        started_ = 0;
      }
      catch(evf::Exception &e){
      }
    } else {started_ = 1;}
    return started_;    
  }
int evf::Vulture::hasStopped ( )

Definition at line 228 of file Vulture.cc.

References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and stopped_.

                         {
    if(stopped_<0){
      MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK);
      try{
        mq_->rcvNonBlocking(msg2);
        stopped_ = 0;
      }
      catch(evf::Exception &e){
      }
    } else {stopped_ = 1;}
    return stopped_;    
  }
pid_t evf::Vulture::kill ( )

Definition at line 263 of file Vulture.cc.

References vulturePid_.

  {
    ::kill (vulturePid_, SIGKILL);
    int sl;
    pid_t killedOrNot = waitpid(vulturePid_,&sl,WNOHANG);
    vulturePid_ = 0;
    return killedOrNot;
  }
pid_t evf::Vulture::makeProcess ( )

Definition at line 154 of file Vulture.cc.

References asCtrl_, control(), gather_cfg::cout, alignCSCRings::e, Exception, FS, handicapped_, sq_, summarizeEdmComparisonLogfiles::success, tmp_, vulture_queue_offset, vulturePid_, and wlCtrl_.

Referenced by evf::FUEventProcessor::configuring().

                            {

    pid_t retval = fork();
    if(retval==0){ // we are in the forked process
      int success = -1;
// #ifdef linux
//       success = prctl( PR_SET_DUMPABLE, 0 );
// #endif
      if(success != 0){
        std::cout << "Vulture::could not set process undumpable" << std::endl;
        handicapped_ = true;
      }
#ifdef linux
      success = prctl( PR_SET_PDEATHSIG, SIGKILL );
#endif
      if(success != 0){
        std::cout << "Vulture::could not set process death signal" << std::endl;
        handicapped_ = true;    
      }
      tmp_ = opendir(FS.c_str());
#ifdef linux
      success = prctl ( PR_SET_NAME , "vulture");
#endif
      if(success != 0){
        std::cout << "Vulture::could not set process name" << std::endl;
        handicapped_ = true;    
      }

      try{
        pt::PeerTransport * ptr =
          pt::getPeerTransportAgent()->getPeerTransport("http","soap",pt::Receiver);
        delete ptr;
      }
      catch (pt::exception::PeerTransportNotFound & e ){
        //do nothing here since we don't know what to do... ?
      }
      //      freopen("/dev/null","w",stderr);
      sq_ = new SlaveQueue(vulture_queue_offset);
      // start the ctrl workloop
      try {
        wlCtrl_=
          toolbox::task::getWorkLoopFactory()->getWorkLoop("Ctrll",
                                                           "waiting");
        if (!wlCtrl_->isActive()) wlCtrl_->activate();
        
        asCtrl_ = toolbox::task::bind(this,&Vulture::control,
                                       "Ctrl");
        wlCtrl_->submit(asCtrl_);
      }
      catch (xcept::Exception& e) {
        std::cout << "Vulture:constructor - could not start workloop 'Ctrl' for process " << retval << std::endl;
      }
    }
    else{
      vulturePid_ = retval;
    }
    return retval;


  }
bool evf::Vulture::prowling ( toolbox::task::WorkLoop *  wl) [private]

Definition at line 343 of file Vulture.cc.

References analyze(), gather_cfg::cout, currentCoreList_, alignCSCRings::e, lastUpdate_, newCores_, cmsPerfSuiteHarvest::now, poster_, evf::CurlPoster::postString(), prowling_, stor::utils::sleep(), evf::CurlPoster::stack, and tmp_.

Referenced by startProwling().

  {

    if(!prowling_){
      char messageDie[5];
      sprintf(messageDie,"Dead");
      if(poster_==0){
        std::cout << "Vulture: asked to stop prowling but no poster " 
                  << std::endl;
        return false;
      }
      try{
        poster_->postString(messageDie,5,0,CurlPoster::stack);
      }
      catch(evf::Exception &e){
          //do nothing just swallow the exception
      }
      std::cout << "Received STOP message, going to delete poster " << std::endl;
//       delete poster_;
//       poster_=0;
      
      return false;
    }
    
    newCores_ = 0;
    
    struct stat filestat;    
    
    timeval now;
    gettimeofday(&now,0);
    
    // examine /tmp looking for new coredumps
    dirent *dirp;
    while((dirp = readdir(tmp_))!=0){
      if(strncmp(dirp->d_name,"core",4)==0){
        stat(dirp->d_name,&filestat);
        if(filestat.st_mtime > lastUpdate_){
          currentCoreList_.push_back(dirp->d_name);
          newCores_++;
        }
      }
    }
    rewinddir(tmp_);
    lastUpdate_ = now.tv_sec;
    try{
      analyze();
    }
    catch(evf::Exception &e){
      std::cout << "Vulture cannot send to iDie server, bail out " << std::endl;
      return false;
    }
    ::sleep(60);
    return true;
  }
void evf::Vulture::retrieve_corefile ( char *  ,
char *  ,
uint64_t   
)
pid_t evf::Vulture::start ( std::string  url,
int  run = 0 
)

Definition at line 241 of file Vulture.cc.

References mq_, MSQM_VULTURE_TYPE_STA, evf::MasterQueue::post(), DTTTrigCorrFirst::run, evf::vulture_start_message::run_, stopped_, evf::vulture_start_message::url_, and vulturePid_.

Referenced by evf::FUEventProcessor::enabling().

                                            {

    //communicate start-of-run to Vulture
    vulture_start_message stamsg;
    strcpy(stamsg.url_,url.c_str()); 
    stamsg.run_ = run;
    MsgBuf msg1(sizeof(vulture_start_message),MSQM_VULTURE_TYPE_STA);
    memcpy(msg1->mtext,&stamsg,sizeof(vulture_start_message));
    mq_->post(msg1);
    stopped_ = -1;
    return vulturePid_;
  }
void evf::Vulture::startProwling ( ) [private]

Definition at line 272 of file Vulture.cc.

References asProwl_, alignCSCRings::e, Exception, lastUpdate_, lumiQueryAPI::msg, cmsPerfSuiteHarvest::now, prowling(), prowling_, and wlProwl_.

Referenced by control().

  {
    timeval now;
    gettimeofday(&now,0);
    lastUpdate_ = now.tv_sec;
    prowling_ = true;
    try {
      wlProwl_=
        toolbox::task::getWorkLoopFactory()->getWorkLoop("Prowl",
                                                         "waiting");
      if (!wlProwl_->isActive()) wlProwl_->activate();
      
      asProwl_ = toolbox::task::bind(this,&Vulture::prowling,
                                         "Prowl");
      wlProwl_->submit(asProwl_);
    }
    catch (xcept::Exception& e) {
      std::string msg = "Failed to start workloop 'Prowl'.";
      XCEPT_RETHROW(evf::Exception,msg,e);
    }

  }
pid_t evf::Vulture::stop ( )

Member Data Documentation

toolbox::task::ActionSignature* evf::Vulture::asCtrl_ [private]

Definition at line 61 of file Vulture.h.

Referenced by makeProcess().

toolbox::task::ActionSignature* evf::Vulture::asProwl_ [private]

Definition at line 64 of file Vulture.h.

Referenced by startProwling().

std::vector<std::string> evf::Vulture::currentCoreList_ [private]

Definition at line 70 of file Vulture.h.

Referenced by analyze(), and prowling().

const std::string evf::Vulture::FS = "/tmp" [static, private]

Definition at line 59 of file Vulture.h.

Referenced by analyze(), and makeProcess().

Definition at line 78 of file Vulture.h.

Referenced by makeProcess().

std::string evf::Vulture::iDieUrl_ [private]

Definition at line 66 of file Vulture.h.

time_t evf::Vulture::lastUpdate_ [private]

Definition at line 71 of file Vulture.h.

Referenced by prowling(), and startProwling().

Definition at line 74 of file Vulture.h.

Referenced by hasStarted(), hasStopped(), start(), stop(), and ~Vulture().

unsigned int evf::Vulture::newCores_ [private]

Definition at line 72 of file Vulture.h.

Referenced by analyze(), and prowling().

Definition at line 73 of file Vulture.h.

Referenced by analyze(), control(), prowling(), and ~Vulture().

bool evf::Vulture::prowling_ [private]

Definition at line 65 of file Vulture.h.

Referenced by control(), prowling(), and startProwling().

bool evf::Vulture::running_ [private]

Definition at line 62 of file Vulture.h.

Definition at line 75 of file Vulture.h.

Referenced by control(), makeProcess(), and ~Vulture().

int evf::Vulture::started_ [private]

Definition at line 76 of file Vulture.h.

Referenced by hasStarted(), and stop().

int evf::Vulture::stopped_ [private]

Definition at line 77 of file Vulture.h.

Referenced by hasStopped(), and start().

DIR* evf::Vulture::tmp_ [private]

Definition at line 69 of file Vulture.h.

Referenced by makeProcess(), and prowling().

bool evf::Vulture::updateMode_ [private]

Definition at line 67 of file Vulture.h.

const int evf::Vulture::vulture_queue_offset = 400 [static, private]

Definition at line 53 of file Vulture.h.

Referenced by makeProcess().

pid_t evf::Vulture::vulturePid_ [private]

Definition at line 68 of file Vulture.h.

Referenced by kill(), makeProcess(), start(), and stop().

toolbox::task::WorkLoop* evf::Vulture::wlCtrl_ [private]

Definition at line 60 of file Vulture.h.

Referenced by makeProcess().

toolbox::task::WorkLoop* evf::Vulture::wlProwl_ [private]

Definition at line 63 of file Vulture.h.

Referenced by startProwling().