11 #include <sys/param.h>
12 #include <sys/mount.h>
14 #include <sys/statfs.h>
23 #include <boost/bind.hpp>
24 #include <boost/regex.hpp>
40 updateInterval_(updateInterval),
42 numberOfCopyWorkers_(-1),
43 numberOfInjectWorkers_(-1),
45 latchedSataBeastStatus_(-1)
64 pathName <<
"/" << std::setfill(
'0') << std::setw(2) <<
i;
79 if ( pathname.empty() )
return;
82 diskUsage->pathName = pathname;
90 for ( DiskWritingParams::OtherDiskPaths::const_iterator
106 rmParams_ = rmParams;
115 alarmParams_ = alarmParams;
136 for ( DiskUsagePtrList::const_iterator it =
diskUsageList_.begin(),
142 diskUsageStats->diskSize = (*it)->diskSize;
143 diskUsageStats->absDiskUsage = (*it)->absDiskUsage;
144 diskUsageStats->relDiskUsage = (*it)->relDiskUsage;
145 diskUsageStats->pathName = (*it)->pathName;
146 diskUsageStats->alarmState = (*it)->alarmState;
168 for ( DiskUsagePtrList::const_iterator it =
diskUsageList_.begin(),
173 (*it)->absDiskUsage = -1;
174 (*it)->relDiskUsage = -1;
182 infoSpaceItems.push_back(std::make_pair(
"copyWorkers", &
copyWorkers_));
183 infoSpaceItems.push_back(std::make_pair(
"injectWorkers", &
injectWorkers_));
184 infoSpaceItems.push_back(std::make_pair(
"sataBeastStatus", &
sataBeastStatus_));
185 infoSpaceItems.push_back(std::make_pair(
"numberOfDisks", &
numberOfDisks_));
186 infoSpaceItems.push_back(std::make_pair(
"diskPaths", &
diskPaths_));
187 infoSpaceItems.push_back(std::make_pair(
"totalDiskSpace", &
totalDiskSpace_));
188 infoSpaceItems.push_back(std::make_pair(
"usedDiskSpace", &
usedDiskSpace_));
218 for (DiskUsageStatsPtrList::const_iterator
225 static_cast<xdata::String>( (*it)->pathName )
228 static_cast<xdata::UnsignedInteger32>(
229 static_cast<unsigned int>( (*it)->diskSize * 1024 )
233 static_cast<xdata::UnsignedInteger32>(
234 static_cast<unsigned int>( (*it)->absDiskUsage * 1024 )
260 int retVal = statfs(diskUsage->pathName.c_str(), &buf);
263 int retVal = statfs64(diskUsage->pathName.c_str(), &buf);
266 unsigned int blksize = buf.f_bsize;
267 diskUsage->diskSize =
268 static_cast<double>(buf.f_blocks * blksize) / 1024 / 1024 / 1024;
269 diskUsage->absDiskUsage =
270 diskUsage->diskSize -
271 static_cast<double>(buf.f_bavail * blksize) / 1024 / 1024 / 1024;
272 diskUsage->relDiskUsage = (100 * (diskUsage->absDiskUsage / diskUsage->diskSize));
286 diskUsage->diskSize = -1;
287 diskUsage->absDiskUsage = -1;
288 diskUsage->relDiskUsage = -1;
302 msg =
"Cannot access " + diskUsage->pathName +
". Is it mounted?";
307 msg =
"Failed to retrieve disk space information for " + diskUsage->pathName +
":"
311 XCEPT_DECLARE(stor::exception::DiskSpaceAlarm, ex, msg);
325 XCEPT_DECLARE(stor::exception::DiskSpaceAlarm, ex, diskUsage->toString());
326 alarmHandler_->raiseAlarm(diskUsage->pathName, diskUsage->alarmState, ex);
333 DiskWritingParams::OtherDiskPaths::const_iterator
begin =
335 DiskWritingParams::OtherDiskPaths::const_iterator
end =
337 if (
std::find(begin, end, diskUsage->pathName) != end )
return;
340 XCEPT_RAISE(stor::exception::DiskSpaceAlarm, diskUsage->toString());
374 const std::string alarmName =
"CopyWorkers";
378 std::ostringstream
msg;
380 " running CopyWorkers, but found " <<
382 XCEPT_DECLARE(stor::exception::CopyWorkers, ex, msg.str());
416 const std::string alarmName =
"InjectWorkers";
420 std::ostringstream
msg;
422 " running InjectWorkers, but found " <<
424 XCEPT_DECLARE(stor::exception::InjectWorkers, ex, msg.str());
440 SATABeasts::const_iterator it = sataBeasts.begin(),
441 itEnd= sataBeasts.end();
461 in.open(
"/proc/mounts" );
463 if ( ! in.is_open() )
return false;
466 while( getline(in,line) )
468 size_t pos = line.find(
"sata");
469 if ( pos != std::string::npos )
471 std::ostringstream
host;
473 << std::setw(2) << std::setfill(
'0')
474 << line.substr(pos+4,1)
476 << std::setw(2) << std::setfill(
'0')
477 << line.substr(pos+5,1);
478 sataBeasts.insert(host.str());
481 return !sataBeasts.empty();
489 XCEPT_DECLARE(stor::exception::SataBeast, ex,
490 "Failed to connect to SATA beast " + sataBeast);
500 const std::string& sataBeast,
501 const std::string& hostSuffix
508 if ( rmParams_.sataUser_.empty() )
return true;
512 "http://" + sataBeast + hostSuffix +
"/status.asp",rmParams_.sataUser_,
516 if (returnCode == CURLE_OK)
518 updateSataBeastStatus(sataBeast, std::string(&content[0]));
523 std::ostringstream
msg;
524 msg <<
"Failed to connect to SATA controller "
525 << sataBeast << hostSuffix
526 <<
": " << std::string(&content[0]);
527 XCEPT_DECLARE(stor::exception::SataBeast, ex, msg.str());
537 const std::string& sataBeast,
538 const std::string& content
541 boost::regex failedEntry(
">([^<]* has failed[^<]*)");
542 boost::regex failedDisk(
"Hard disk([[:digit:]]+)");
543 boost::regex failedController(
"RAID controller ([[:digit:]]+)");
544 boost::match_results<std::string::const_iterator> matchedEntry, matchedCause;
545 boost::match_flag_type
flags = boost::match_default;
547 std::string::const_iterator start = content.begin();
548 std::string::const_iterator
end = content.end();
550 unsigned int newSataBeastStatus = 0;
552 while( regex_search(start, end, matchedEntry, failedEntry, flags) )
554 std::string errorMsg = matchedEntry[1];
555 XCEPT_DECLARE(stor::exception::SataBeast, ex, sataBeast+
": "+errorMsg);
559 if ( regex_search(errorMsg, matchedCause, failedDisk) )
562 ++newSataBeastStatus;
564 else if ( regex_search(errorMsg, matchedCause, failedController) )
567 newSataBeastStatus += 100;
572 newSataBeastStatus += 1000;
576 start = matchedEntry[0].second;
578 flags |= boost::match_prev_avail;
579 flags |= boost::match_not_bob;
582 latchedSataBeastStatus_ = newSataBeastStatus;
584 if (latchedSataBeastStatus_ == 0)
585 alarmHandler_->revokeAlarm(sataBeast);
593 return !fnmatch(
"[1-9]*", dir->d_name, 0);
596 bool matchUid(
const std::string&
filename,
const uid_t& uid)
598 struct stat filestat;
599 int result = stat(filename.c_str(), &filestat);
600 return (result == 0 && filestat.st_uid == uid);
603 bool isMaster(
const char*
pid)
609 std::ostringstream statfile;
610 statfile <<
"/proc/" << pid <<
"/stat";
611 snprintf(buf, 32, statfile.str().c_str(),
pid);
612 if ( (fd = open(buf, O_RDONLY, 0) ) == -1 )
return false;
613 int num =
read(fd, buf,
sizeof buf - 1);
614 if(num<80)
return false;
616 char*
tmp = strrchr(buf,
')');
617 num = sscanf(tmp + 4,
620 return ( num == 1 && ppid == 1 );
623 bool grep(
const std::string& cmdline,
const std::string&
name)
627 in.open( cmdline.c_str() );
633 while( getline(in,tmp,
'\0') )
641 return ( line.find(name) != std::string::npos );
648 const std::string& processName,
653 struct dirent **namelist;
659 n = scandir(
"/proc", &namelist,
filter, 0);
661 if (n < 0)
return -1;
665 std::ostringstream cmdline;
666 cmdline <<
"/proc/" << namelist[
n]->d_name <<
"/cmdline";
668 if ( grep(cmdline.str(), processName) &&
669 (uid < 0 || matchUid(cmdline.str(), uid)) &&
670 isMaster(namelist[n]->d_name) )
684 std::ostringstream
msg;
685 msg << std::fixed << std::setprecision(1) <<
686 "Disk space usage for " <<
pathName <<
virtual void do_updateInfoSpaceItems()
ResourceMonitorParams rmParams_
int numberOfInjectWorkers_
void calcNumberOfInjectWorkers()
void checkNumberOfInjectWorkers()
boost::mutex diskUsageListMutex_
void calcNumberOfCopyWorkers()
void emitDiskSpaceAlarm(DiskUsagePtr)
ResourceMonitorCollection(const utils::Duration_t &updateInterval, AlarmHandlerPtr)
xdata::UnsignedInteger32 numberOfDisks_
std::vector< Variable::Flags > flags
OtherDiskPaths otherDiskPaths_
void getDiskStats(Stats &) const
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
int latchedSataBeastStatus_
boost::shared_ptr< DiskUsage > DiskUsagePtr
int numberOfInjectWorkers
const std::string * pathName() const
xdata::UnsignedInteger32 copyWorkers_
boost::posix_time::time_duration Duration_t
xdata::Integer32 sataBeastStatus_
const T & max(const T &a, const T &b)
DiskWritingParams dwParams_
xdata::Vector< xdata::UnsignedInteger32 > usedDiskSpace_
DiskUsageStatsPtrList diskUsageStatsList
void configureDisks(DiskWritingParams const &)
xdata::UnsignedInteger32 injectWorkers_
void getStats(Stats &) const
void retrieveDiskSize(DiskUsagePtr)
std::set< std::string > SATABeasts
bool checkSataDisks(const std::string &sataBeast, const std::string &hostSuffix)
WorkerParams copyWorkers_
xdata::Vector< xdata::UnsignedInteger32 > totalDiskSpace_
CURLcode getContent(const std::string &url, const std::string &user, Content &content)
double failHighWaterMark_
boost::shared_ptr< AlarmHandler > AlarmHandlerPtr
void configureAlarms(AlarmParams const &)
void failIfImportantDisk(DiskUsagePtr)
virtual void do_appendInfoSpaceItems(InfoSpaceItems &)
void checkSataBeast(const std::string &sataBeast)
xdata::Vector< xdata::String > diskPaths_
tuple filter
USE THIS FOR SKIMMED TRACKS process.p = cms.Path(process.hltLevel1GTSeed*process.skimming*process.offlineBeamSpot*process.TrackRefitter2) OTHERWISE USE THIS.
bool getSataBeasts(SATABeasts &sataBeasts)
boost::shared_ptr< DiskUsageStats > DiskUsageStatsPtr
std::vector< std::pair< std::string, xdata::Serializable * > > InfoSpaceItems
unsigned int nLogicalDisks_
WorkerParams injectWorkers_
void revokeDiskAlarm(DiskUsagePtr)
std::vector< std::vector< double > > tmp
void updateSataBeastStatus(const std::string &sataBeast, const std::string &content)
void emitDiskAlarm(DiskUsagePtr, error_t)
void configureResources(ResourceMonitorParams const &)
AlarmHandlerPtr alarmHandler_
void checkNumberOfCopyWorkers()
int getProcessCount(const std::string &processName, const int &uid=-1)
void addDisk(const std::string &)
virtual void do_calculateStatistics()
std::vector< char > Content
DiskUsagePtrList diskUsageList_