CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes
evf::Vulture Class Reference

#include <Vulture.h>

Inheritance diagram for evf::Vulture:

Public Member Functions

int hasStarted ()
 
int hasStopped ()
 
pid_t kill ()
 
pid_t makeProcess ()
 
void retrieve_corefile (char *, char *, uint64_t)
 
int start (std::string, int=0)
 
int stop ()
 
 Vulture (bool)
 
virtual ~Vulture ()
 

Private Member Functions

void analyze ()
 
bool control (toolbox::task::WorkLoop *)
 
bool prowling (toolbox::task::WorkLoop *)
 
void startProwling ()
 

Private Attributes

toolbox::task::ActionSignature * asCtrl_
 
toolbox::task::ActionSignature * asProwl_
 
std::vector< std::string > currentCoreList_
 
bool handicapped_
 
std::string iDieUrl_
 
time_t lastUpdate_
 
MasterQueuemq_
 
unsigned int newCores_
 
CurlPosterposter_
 
bool prowling_
 
bool running_
 
SlaveQueuesq_
 
int started_
 
int stopped_
 
DIR * tmp_
 
bool updateMode_
 
pid_t vulturePid_
 
toolbox::task::WorkLoop * wlCtrl_
 
toolbox::task::WorkLoop * wlProwl_
 

Static Private Attributes

static const std::string FS ="/tmp"
 
static const int vulture_queue_offset = 400
 

Detailed Description

Definition at line 36 of file Vulture.h.

Constructor & Destructor Documentation

evf::Vulture::Vulture ( bool  push)

Definition at line 117 of file Vulture.cc.

118  : wlCtrl_(0)
119  , asCtrl_(0)
120  , running_(false)
121  , wlProwl_(0)
122  , asProwl_(0)
123  , prowling_(false)
124  , iDieUrl_("")
125  , updateMode_(push)
126  , vulturePid_(0)
127  , tmp_(0)
128  , newCores_(0)
129  , poster_(0)
130  , mq_(new MasterQueue(vulture_queue_offset))
131  , sq_(0) // this is only defined in the forked process
132  , started_(-1)
133  , stopped_(-1)
134  , handicapped_(false)
135  {
136  // create command file for gdb, if not already there
137  std::ifstream vulture("/tmp/vulture.cmd");
138  if (!vulture.good())
139  {
140  FILE *outf = fopen("/tmp/vulture.cmd","w");
141  fprintf(outf,"where\n");
142  fclose(outf);
143  }
144 
145  }
bool running_
Definition: Vulture.h:62
toolbox::task::WorkLoop * wlProwl_
Definition: Vulture.h:63
SlaveQueue * sq_
Definition: Vulture.h:75
bool updateMode_
Definition: Vulture.h:67
DIR * tmp_
Definition: Vulture.h:69
toolbox::task::ActionSignature * asCtrl_
Definition: Vulture.h:61
toolbox::task::ActionSignature * asProwl_
Definition: Vulture.h:64
toolbox::task::WorkLoop * wlCtrl_
Definition: Vulture.h:60
bool prowling_
Definition: Vulture.h:65
CurlPoster * poster_
Definition: Vulture.h:73
static const int vulture_queue_offset
Definition: Vulture.h:53
MasterQueue * mq_
Definition: Vulture.h:74
unsigned int newCores_
Definition: Vulture.h:72
bool handicapped_
Definition: Vulture.h:78
int stopped_
Definition: Vulture.h:77
int started_
Definition: Vulture.h:76
std::string iDieUrl_
Definition: Vulture.h:66
pid_t vulturePid_
Definition: Vulture.h:68
evf::Vulture::~Vulture ( )
virtual

Definition at line 147 of file Vulture.cc.

References mq_, poster_, and sq_.

148  {
149  delete mq_;
150  if(sq_ != 0) delete sq_;
151  if(poster_ != 0) delete poster_;
152  }
SlaveQueue * sq_
Definition: Vulture.h:75
CurlPoster * poster_
Definition: Vulture.h:73
MasterQueue * mq_
Definition: Vulture.h:74

Member Function Documentation

void evf::Vulture::analyze ( void  )
private

Definition at line 398 of file Vulture.cc.

References edmPickEvents::command, gather_cfg::cout, currentCoreList_, FS, i, newCores_, AlCaHLTBitMon_ParallelJobs::p, evf::utils::pid, poster_, evf::CurlPoster::postString(), alignCSCRings::s, evf::CurlPoster::stack, and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by prowling().

399  {
400  // do a first analysis of the coredump
401  if(newCores_==0) return;
402  for(unsigned int i = currentCoreList_.size()-newCores_;
403  i < currentCoreList_.size();
404  i++){
405  std::string command = "gdb /opt/xdaq/bin/xdaq.exe -batch -x /tmp/vulture.cmd -c /tmp/";
406  std::string cmdout;
407  command += currentCoreList_[i];
408  std::string filePathAndName = FS + "/";
409  filePathAndName += currentCoreList_[i];
410  std::string pid =
411  currentCoreList_[i].substr(currentCoreList_[i].find_first_of(".")+1,
412  currentCoreList_[i].length());
413 
414  FILE *ps = popen(command.c_str(),"r");
415  size_t s = 256;
416  char *p=new char[s];
417  bool filter = false;
418  while(getline(&p,&s,ps) != -1){
419  if(strncmp("Core",p,4)==0) filter = true;
420  if(filter)cmdout += p;
421  }
422  delete[] p;
423  pclose(ps);
424  int errsv = 0;
425  int rch = chmod(filePathAndName.c_str(),0777);
426  if(rch != 0){
427  errsv = errno;
428  std::cout << "ERROR: couldn't change corefile access privileges -"
429  << strerror(errsv)<< std::endl;
430  }
431  unsigned int ipid = (unsigned int)atoi(pid.c_str());
432  poster_->postString(cmdout.c_str(),cmdout.length(),ipid, CurlPoster::stack);
433 
434  }
435  }
int i
Definition: DBlmapReader.cc:9
static const std::string FS
Definition: Vulture.h:59
void postString(const char *, size_t, unsigned int, mode, const std::string &=standard_post_method_)
Definition: CurlPoster.cc:112
std::vector< std::string > currentCoreList_
Definition: Vulture.h:70
CurlPoster * poster_
Definition: Vulture.h:73
tuple cout
Definition: gather_cfg.py:121
unsigned int newCores_
Definition: Vulture.h:72
bool evf::Vulture::control ( toolbox::task::WorkLoop *  wl)
private

Definition at line 295 of file Vulture.cc.

References evf::CurlPoster::check(), gather_cfg::cout, alignCSCRings::e, lumiQueryAPI::msg, MSQM_MESSAGE_TYPE_NOP, MSQM_VULTURE_TYPE_STA, MSQM_VULTURE_TYPE_STP, MSQS_VULTURE_TYPE_ACK, evf::SlaveQueue::post(), poster_, prowling_, evf::SlaveQueue::rcv(), evf::vulture_start_message::run_, sq_, startProwling(), and evf::vulture_start_message::url_.

Referenced by makeProcess().

296  {
297 
298  MsgBuf msg;
299  unsigned long mtype = MSQM_MESSAGE_TYPE_NOP;
300  try{mtype = sq_->rcv(msg);}catch(evf::Exception &e){
301  std::cout << "Vulture::exception on msgrcv for control, bailing out of control workloop - good bye" << std::endl;
302  return false;
303  }
304  mtype = msg->mtype;
305  switch(mtype){
307  {
308 
309  vulture_start_message *sta = (vulture_start_message*)msg->mtext;
310  if(poster_ == 0) poster_ = new CurlPoster(sta->url_);
311  if(poster_->check(sta->run_)){
312  try{
313  startProwling();
314  MsgBuf msg1(0,MSQS_VULTURE_TYPE_ACK) ;
315  sq_->post(msg1);
316  }
317  catch(evf::Exception &e)
318  {
319  std::cout << "Vulture::start - exception in starting prowling workloop " << e.what() << std::endl;
320  //@EM ToDo generate some message here
321  }
322  }else{
323  std::cout << "Vulture::start - could not contact iDie - chech Url - will not start prowling loop" << std::endl;
324  prowling_ = false;
325  }
326 
327  break;
328  }
330  {
331  prowling_ = false;
332  break;
333  }
334  default:
335  {
336  // do nothing @EM ToDo generate an appropriate error message
337  }
338  }
339  return true;
340 
341  }
bool check(int)
Definition: CurlPoster.cc:125
#define MSQS_VULTURE_TYPE_ACK
Definition: queue_defs.h:35
SlaveQueue * sq_
Definition: Vulture.h:75
int post(MsgBuf &ptr)
Definition: SlaveQueue.cc:18
#define MSQM_MESSAGE_TYPE_NOP
Definition: queue_defs.h:15
bool prowling_
Definition: Vulture.h:65
void startProwling()
Definition: Vulture.cc:272
CurlPoster * poster_
Definition: Vulture.h:73
#define MSQM_VULTURE_TYPE_STP
Definition: queue_defs.h:23
unsigned long rcv(MsgBuf &ptr)
Definition: SlaveQueue.cc:28
tuple cout
Definition: gather_cfg.py:121
#define MSQM_VULTURE_TYPE_STA
Definition: queue_defs.h:22
int evf::Vulture::hasStarted ( )

Definition at line 215 of file Vulture.cc.

References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and started_.

Referenced by evf::FUEventProcessor::updater().

215  {
216  if(started_<0){
217  MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK);
218  try{
219  mq_->rcvNonBlocking(msg2);
220  started_ = 0;
221  }
222  catch(evf::Exception &e){
223  }
224  } else {started_ = 1;}
225  return started_;
226  }
#define MSQS_VULTURE_TYPE_ACK
Definition: queue_defs.h:35
#define MAX_MSG_SIZE
Definition: queue_defs.h:10
unsigned long rcvNonBlocking(MsgBuf &ptr)
Definition: MasterQueue.cc:82
MasterQueue * mq_
Definition: Vulture.h:74
int started_
Definition: Vulture.h:76
int evf::Vulture::hasStopped ( )

Definition at line 228 of file Vulture.cc.

References alignCSCRings::e, MAX_MSG_SIZE, mq_, MSQS_VULTURE_TYPE_ACK, evf::MasterQueue::rcvNonBlocking(), and stopped_.

228  {
229  if(stopped_<0){
230  MsgBuf msg2(MAX_MSG_SIZE,MSQS_VULTURE_TYPE_ACK);
231  try{
232  mq_->rcvNonBlocking(msg2);
233  stopped_ = 0;
234  }
235  catch(evf::Exception &e){
236  }
237  } else {stopped_ = 1;}
238  return stopped_;
239  }
#define MSQS_VULTURE_TYPE_ACK
Definition: queue_defs.h:35
#define MAX_MSG_SIZE
Definition: queue_defs.h:10
unsigned long rcvNonBlocking(MsgBuf &ptr)
Definition: MasterQueue.cc:82
MasterQueue * mq_
Definition: Vulture.h:74
int stopped_
Definition: Vulture.h:77
pid_t evf::Vulture::kill ( )

Definition at line 263 of file Vulture.cc.

References vulturePid_.

264  {
265  ::kill (vulturePid_, SIGKILL);
266  int sl;
267  pid_t killedOrNot = waitpid(vulturePid_,&sl,WNOHANG);
268  vulturePid_ = 0;
269  return killedOrNot;
270  }
pid_t kill()
Definition: Vulture.cc:263
pid_t vulturePid_
Definition: Vulture.h:68
pid_t evf::Vulture::makeProcess ( )

Definition at line 154 of file Vulture.cc.

References asCtrl_, control(), gather_cfg::cout, alignCSCRings::e, edm::hlt::Exception, FS, handicapped_, sq_, summarizeEdmComparisonLogfiles::success, tmp_, vulture_queue_offset, vulturePid_, and wlCtrl_.

Referenced by evf::FUEventProcessor::configuring().

154  {
155 
156  pid_t retval = fork();
157  if(retval==0){ // we are in the forked process
158  int success = -1;
159 // #ifdef linux
160 // success = prctl( PR_SET_DUMPABLE, 0 );
161 // #endif
162  if(success != 0){
163  std::cout << "Vulture::could not set process undumpable" << std::endl;
164  handicapped_ = true;
165  }
166 #ifdef linux
167  success = prctl( PR_SET_PDEATHSIG, SIGKILL );
168 #endif
169  if(success != 0){
170  std::cout << "Vulture::could not set process death signal" << std::endl;
171  handicapped_ = true;
172  }
173  tmp_ = opendir(FS.c_str());
174 #ifdef linux
175  success = prctl ( PR_SET_NAME , "vulture");
176 #endif
177  if(success != 0){
178  std::cout << "Vulture::could not set process name" << std::endl;
179  handicapped_ = true;
180  }
181 
182  try{
183  pt::PeerTransport * ptr =
184  pt::getPeerTransportAgent()->getPeerTransport("http","soap",pt::Receiver);
185  delete ptr;
186  }
187  catch (pt::exception::PeerTransportNotFound & e ){
188  //do nothing here since we don't know what to do... ?
189  }
190  // freopen("/dev/null","w",stderr);
191  sq_ = new SlaveQueue(vulture_queue_offset);
192  // start the ctrl workloop
193  try {
194  wlCtrl_=
195  toolbox::task::getWorkLoopFactory()->getWorkLoop("Ctrll",
196  "waiting");
197  if (!wlCtrl_->isActive()) wlCtrl_->activate();
198 
199  asCtrl_ = toolbox::task::bind(this,&Vulture::control,
200  "Ctrl");
201  wlCtrl_->submit(asCtrl_);
202  }
203  catch (xcept::Exception& e) {
204  std::cout << "Vulture:constructor - could not start workloop 'Ctrl' for process " << retval << std::endl;
205  }
206  }
207  else{
208  vulturePid_ = retval;
209  }
210  return retval;
211 
212 
213  }
SlaveQueue * sq_
Definition: Vulture.h:75
static const std::string FS
Definition: Vulture.h:59
DIR * tmp_
Definition: Vulture.h:69
toolbox::task::ActionSignature * asCtrl_
Definition: Vulture.h:61
toolbox::task::WorkLoop * wlCtrl_
Definition: Vulture.h:60
bool control(toolbox::task::WorkLoop *)
Definition: Vulture.cc:295
static const int vulture_queue_offset
Definition: Vulture.h:53
tuple cout
Definition: gather_cfg.py:121
bool handicapped_
Definition: Vulture.h:78
pid_t vulturePid_
Definition: Vulture.h:68
bool evf::Vulture::prowling ( toolbox::task::WorkLoop *  wl)
private

Definition at line 343 of file Vulture.cc.

References analyze(), gather_cfg::cout, currentCoreList_, alignCSCRings::e, lastUpdate_, newCores_, cmsPerfSuiteHarvest::now, poster_, evf::CurlPoster::postString(), prowling_, stor::utils::sleep(), evf::CurlPoster::stack, and tmp_.

Referenced by startProwling().

344  {
345 
346  if(!prowling_){
347  char messageDie[5];
348  sprintf(messageDie,"Dead");
349  if(poster_==0){
350  std::cout << "Vulture: asked to stop prowling but no poster "
351  << std::endl;
352  return false;
353  }
354  try{
355  poster_->postString(messageDie,5,0,CurlPoster::stack);
356  }
357  catch(evf::Exception &e){
358  //do nothing just swallow the exception
359  }
360  std::cout << "Received STOP message, going to delete poster " << std::endl;
361 // delete poster_;
362 // poster_=0;
363 
364  return false;
365  }
366 
367  newCores_ = 0;
368 
369  struct stat filestat;
370 
371  timeval now;
372  gettimeofday(&now,0);
373 
374  // examine /tmp looking for new coredumps
375  dirent *dirp;
376  while((dirp = readdir(tmp_))!=0){
377  if(strncmp(dirp->d_name,"core",4)==0){
378  stat(dirp->d_name,&filestat);
379  if(filestat.st_mtime > lastUpdate_){
380  currentCoreList_.push_back(dirp->d_name);
381  newCores_++;
382  }
383  }
384  }
385  rewinddir(tmp_);
386  lastUpdate_ = now.tv_sec;
387  try{
388  analyze();
389  }
390  catch(evf::Exception &e){
391  std::cout << "Vulture cannot send to iDie server, bail out " << std::endl;
392  return false;
393  }
394  ::sleep(60);
395  return true;
396  }
time_t lastUpdate_
Definition: Vulture.h:71
void postString(const char *, size_t, unsigned int, mode, const std::string &=standard_post_method_)
Definition: CurlPoster.cc:112
void sleep(Duration_t)
Definition: Utils.h:163
std::vector< std::string > currentCoreList_
Definition: Vulture.h:70
DIR * tmp_
Definition: Vulture.h:69
bool prowling_
Definition: Vulture.h:65
void analyze()
Definition: Vulture.cc:398
CurlPoster * poster_
Definition: Vulture.h:73
tuple cout
Definition: gather_cfg.py:121
unsigned int newCores_
Definition: Vulture.h:72
void evf::Vulture::retrieve_corefile ( char *  ,
char *  ,
uint64_t   
)
pid_t evf::Vulture::start ( std::string  url,
int  run = 0 
)

Definition at line 241 of file Vulture.cc.

References mq_, MSQM_VULTURE_TYPE_STA, evf::MasterQueue::post(), DTTTrigCorrFirst::run, evf::vulture_start_message::run_, stopped_, evf::vulture_start_message::url_, and vulturePid_.

Referenced by progressbar.ProgressBar::__next__(), Types.LuminosityBlockRange::cppID(), Types.EventRange::cppID(), and evf::FUEventProcessor::enabling().

241  {
242 
243  //communicate start-of-run to Vulture
244  vulture_start_message stamsg;
245  strcpy(stamsg.url_,url.c_str());
246  stamsg.run_ = run;
247  MsgBuf msg1(sizeof(vulture_start_message),MSQM_VULTURE_TYPE_STA);
248  memcpy(msg1->mtext,&stamsg,sizeof(vulture_start_message));
249  mq_->post(msg1);
250  stopped_ = -1;
251  return vulturePid_;
252  }
MasterQueue * mq_
Definition: Vulture.h:74
#define MSQM_VULTURE_TYPE_STA
Definition: queue_defs.h:22
int post(MsgBuf &ptr)
Definition: MasterQueue.cc:26
int stopped_
Definition: Vulture.h:77
pid_t vulturePid_
Definition: Vulture.h:68
void evf::Vulture::startProwling ( )
private

Definition at line 272 of file Vulture.cc.

References asProwl_, alignCSCRings::e, edm::hlt::Exception, lastUpdate_, lumiQueryAPI::msg, cmsPerfSuiteHarvest::now, prowling(), prowling_, AlCaHLTBitMon_QueryRunRegistry::string, and wlProwl_.

Referenced by control().

273  {
274  timeval now;
275  gettimeofday(&now,0);
276  lastUpdate_ = now.tv_sec;
277  prowling_ = true;
278  try {
279  wlProwl_=
280  toolbox::task::getWorkLoopFactory()->getWorkLoop("Prowl",
281  "waiting");
282  if (!wlProwl_->isActive()) wlProwl_->activate();
283 
284  asProwl_ = toolbox::task::bind(this,&Vulture::prowling,
285  "Prowl");
286  wlProwl_->submit(asProwl_);
287  }
288  catch (xcept::Exception& e) {
289  std::string msg = "Failed to start workloop 'Prowl'.";
290  XCEPT_RETHROW(evf::Exception,msg,e);
291  }
292 
293  }
toolbox::task::WorkLoop * wlProwl_
Definition: Vulture.h:63
time_t lastUpdate_
Definition: Vulture.h:71
bool prowling(toolbox::task::WorkLoop *)
Definition: Vulture.cc:343
toolbox::task::ActionSignature * asProwl_
Definition: Vulture.h:64
bool prowling_
Definition: Vulture.h:65
pid_t evf::Vulture::stop ( )

Definition at line 254 of file Vulture.cc.

References mq_, MSQM_VULTURE_TYPE_STP, NUMERIC_MESSAGE_SIZE, evf::MasterQueue::post(), started_, and vulturePid_.

Referenced by evf::FUEventProcessor::stopping().

255  {
256 
258  mq_->post(msg1);
259  started_ = -1;
260  return vulturePid_;
261  }
#define NUMERIC_MESSAGE_SIZE
Definition: queue_defs.h:38
#define MSQM_VULTURE_TYPE_STP
Definition: queue_defs.h:23
MasterQueue * mq_
Definition: Vulture.h:74
int post(MsgBuf &ptr)
Definition: MasterQueue.cc:26
int started_
Definition: Vulture.h:76
pid_t vulturePid_
Definition: Vulture.h:68

Member Data Documentation

toolbox::task::ActionSignature* evf::Vulture::asCtrl_
private

Definition at line 61 of file Vulture.h.

Referenced by makeProcess().

toolbox::task::ActionSignature* evf::Vulture::asProwl_
private

Definition at line 64 of file Vulture.h.

Referenced by startProwling().

std::vector<std::string> evf::Vulture::currentCoreList_
private

Definition at line 70 of file Vulture.h.

Referenced by analyze(), and prowling().

const std::string evf::Vulture::FS ="/tmp"
staticprivate

Definition at line 59 of file Vulture.h.

Referenced by analyze(), and makeProcess().

bool evf::Vulture::handicapped_
private

Definition at line 78 of file Vulture.h.

Referenced by makeProcess().

std::string evf::Vulture::iDieUrl_
private

Definition at line 66 of file Vulture.h.

time_t evf::Vulture::lastUpdate_
private

Definition at line 71 of file Vulture.h.

Referenced by prowling(), and startProwling().

MasterQueue* evf::Vulture::mq_
private

Definition at line 74 of file Vulture.h.

Referenced by hasStarted(), hasStopped(), start(), stop(), and ~Vulture().

unsigned int evf::Vulture::newCores_
private

Definition at line 72 of file Vulture.h.

Referenced by analyze(), and prowling().

CurlPoster* evf::Vulture::poster_
private

Definition at line 73 of file Vulture.h.

Referenced by analyze(), control(), prowling(), and ~Vulture().

bool evf::Vulture::prowling_
private

Definition at line 65 of file Vulture.h.

Referenced by control(), prowling(), and startProwling().

bool evf::Vulture::running_
private

Definition at line 62 of file Vulture.h.

SlaveQueue* evf::Vulture::sq_
private

Definition at line 75 of file Vulture.h.

Referenced by control(), makeProcess(), and ~Vulture().

int evf::Vulture::started_
private

Definition at line 76 of file Vulture.h.

Referenced by hasStarted(), and stop().

int evf::Vulture::stopped_
private

Definition at line 77 of file Vulture.h.

Referenced by hasStopped(), and start().

DIR* evf::Vulture::tmp_
private

Definition at line 69 of file Vulture.h.

Referenced by makeProcess(), and prowling().

bool evf::Vulture::updateMode_
private

Definition at line 67 of file Vulture.h.

const int evf::Vulture::vulture_queue_offset = 400
staticprivate

Definition at line 53 of file Vulture.h.

Referenced by makeProcess().

pid_t evf::Vulture::vulturePid_
private

Definition at line 68 of file Vulture.h.

Referenced by kill(), makeProcess(), start(), and stop().

toolbox::task::WorkLoop* evf::Vulture::wlCtrl_
private

Definition at line 60 of file Vulture.h.

Referenced by makeProcess().

toolbox::task::WorkLoop* evf::Vulture::wlProwl_
private

Definition at line 63 of file Vulture.h.

Referenced by startProwling().