00001 #ifndef EVENTFILTER_GOODIES_IDIE_H
00002 #define EVENTFILTER_GOODIES_IDIE_H
00003
00004 #include "EventFilter/Utilities/interface/Exception.h"
00005 #include "EventFilter/Utilities/interface/TriggerReportDef.h"
00006
00007 #include "xdata/String.h"
00008 #include "xdata/UnsignedInteger32.h"
00009 #include "xdata/Boolean.h"
00010 #include "xdata/ActionListener.h"
00011
00012 #include "xoap/MessageReference.h"
00013 #include "xoap/MessageFactory.h"
00014 #include "xoap/Method.h"
00015
00016 #include "xgi/Utils.h"
00017 #include "xgi/Input.h"
00018 #include "xgi/Output.h"
00019 #include "xgi/Method.h"
00020
00021 #include "xdaq/Application.h"
00022
00023 #include "toolbox/net/URN.h"
00024 #include "toolbox/fsm/exception/Exception.h"
00025
00026
00027 #include <vector>
00028 #include <deque>
00029
00030 #include <sys/time.h>
00031 #include <math.h>
00032
00033 #include "TFile.h"
00034 #include "TTree.h"
00035
00036 #include "FWCore/Framework/interface/EventProcessor.h"
00037 #include "DQMServices/Core/src/DQMService.h"
00038 #include "DQMServices/Core/interface/DQMStore.h"
00039 #include "DQMServices/Core/interface/MonitorElement.h"
00040
00041 #define MODNAMES 25
00042
00043 namespace evf {
00044
00045 int modlistSortFunction( const void *a, const void *b);
00046
00047 namespace internal{
00048 struct fu{
00049 time_t tstamp;
00050 unsigned int ccount;
00051 std::vector<pid_t> cpids;
00052 std::vector<std::string> signals;
00053 std::vector<std::string> stacktraces;
00054 };
00055 struct rate{
00056 int nproc;
00057 int nsub;
00058 int nrep;
00059 int npath;
00060 int nendpath;
00061 int ptimesRun[evf::max_paths];
00062 int ptimesPassedPs[evf::max_paths];
00063 int ptimesPassedL1[evf::max_paths];
00064 int ptimesPassed[evf::max_paths];
00065 int ptimesFailed[evf::max_paths];
00066 int ptimesExcept[evf::max_paths];
00067 int etimesRun[evf::max_endpaths];
00068 int etimesPassedPs[evf::max_endpaths];
00069 int etimesPassedL1[evf::max_endpaths];
00070 int etimesPassed[evf::max_endpaths];
00071 int etimesFailed[evf::max_endpaths];
00072 int etimesExcept[evf::max_endpaths];
00073 };
00074
00075 }
00076 typedef std::map<std::string,internal::fu> fmap;
00077 typedef fmap::iterator ifmap;
00078
00079 class iDie : public xdaq::Application,
00080 public xdata::ActionListener
00081 {
00082 public:
00083
00084
00085
00086 XDAQ_INSTANTIATOR();
00087
00088
00089
00090
00091
00092 iDie(xdaq::ApplicationStub *s);
00093 virtual ~iDie();
00094
00095 void defaultWeb(xgi::Input *in,xgi::Output *out)
00096 throw (xgi::exception::Exception);
00097 void summaryTable(xgi::Input *in,xgi::Output *out)
00098 throw (xgi::exception::Exception);
00099 void detailsTable(xgi::Input *in,xgi::Output *out)
00100 throw (xgi::exception::Exception);
00101 void dumpTable(xgi::Input *in,xgi::Output *out)
00102 throw (xgi::exception::Exception);
00103 void updater(xgi::Input *in,xgi::Output *out)
00104 throw (xgi::exception::Exception);
00105 void iChoke(xgi::Input *in,xgi::Output *out)
00106 throw (xgi::exception::Exception);
00107 void iChokeMiniInterface(xgi::Input *in,xgi::Output *out)
00108 throw (xgi::exception::Exception);
00109 void spotlight(xgi::Input *in,xgi::Output *out)
00110 throw (xgi::exception::Exception);
00111
00112 void postEntry(xgi::Input*in,xgi::Output*out)
00113 throw (xgi::exception::Exception);
00114 void postEntryiChoke(xgi::Input*in,xgi::Output*out)
00115 throw (xgi::exception::Exception);
00116
00117
00118 xoap::MessageReference fsmCallback(xoap::MessageReference msg)
00119 throw (xoap::exception::Exception);
00120
00121
00122 void actionPerformed(xdata::Event& e);
00123
00124
00125 private:
00126
00127 struct sorted_indices{
00128 sorted_indices(const std::vector<int> &arr) : arr_(arr)
00129 {
00130 ind_.resize(arr_.size(),0);
00131 unsigned int i = 0;
00132 while(i<ind_.size()) {ind_[i] = i; i++;}
00133 std::sort(ind_.rbegin(),ind_.rend(),*this);
00134 }
00135 int operator[](size_t ind) const {return arr_[ind_[ind]];}
00136
00137 bool operator()(const size_t a, const size_t b) const
00138 {
00139 return arr_[a]<arr_[b];
00140 }
00141 int ii(size_t ind){return ind_[ind];}
00142 std::vector<int> ind_;
00143 const std::vector<int> &arr_;
00144 };
00145
00146
00147
00148 class lsStat;
00149 class commonLsStat;
00150
00151 void reset();
00152 void parseModuleLegenda(std::string);
00153 void parseModuleHisto(const char *, unsigned int);
00154 void parsePathLegenda(std::string);
00155 void parseDatasetLegenda(std::string);
00156 void parsePathHisto(const unsigned char *, unsigned int);
00157 void initFramework();
00158 void deleteFramework();
00159 void initMonitorElements();
00160 void initMonitorElementsStreams();
00161 void initMonitorElementsDatasets();
00162 void fillDQMStatHist(unsigned int nbsIdx, unsigned int lsid);
00163 void fillDQMModFractionHist(unsigned int nbsIdx, unsigned int lsid, unsigned int nonIdle,
00164 std::vector<std::pair<unsigned int, unsigned int>> offenders);
00165
00166 void updateRollingHistos(unsigned int nbsIdx, unsigned int lsid, lsStat * lst, commonLsStat * clst, bool roll);
00167 void updateStreamHistos(unsigned int forls, commonLsStat *clst, commonLsStat *prevclst);
00168 void updateDatasetHistos(unsigned int forls, commonLsStat *clst, commonLsStat *prevclst);
00169 void doFlush();
00170 void perLumiFileSaver(unsigned int lsid);
00171 void perTimeFileSaver();
00172
00173
00174
00175
00176
00177 Logger log_;
00178 std::string dqmState_;
00179
00180 xdata::String url_;
00181 xdata::String class_;
00182 xdata::UnsignedInteger32 instance_;
00183 xdata::String hostname_;
00184
00185 xdata::UnsignedInteger32 runNumber_;
00186 unsigned int lastRunNumberSet_;
00187
00188
00189 MonitorElement * runId_;
00190 MonitorElement * lumisecId_;
00191 MonitorElement * eventId_;
00192 MonitorElement * eventTimeStamp_;
00193
00194 xdata::String dqmCollectorHost_;
00195 xdata::String dqmCollectorPort_;
00196 fmap fus_;
00197
00198 unsigned int totalCores_;
00199 unsigned int nstates_;
00200 std::vector<int> cpuentries_;
00201 std::vector<std::vector<int> > cpustat_;
00202 std::vector<std::string> mapmod_;
00203 unsigned int last_ls_;
00204 std::vector<TriggerReportStatic>trp_;
00205 std::vector<int> trpentries_;
00206 std::vector<std::string> mappath_;
00207
00208 TFile *f_;
00209 TTree *t_;
00210 TBranch *b_;
00211 TBranch *b1_;
00212 TBranch *b2_;
00213 TBranch *b3_;
00214 TBranch *b4_;
00215 int *datap_;
00216 TriggerReportStatic *trppriv_;
00217 internal::rate r_;
00218
00219
00220 int nModuleLegendaMessageReceived_;
00221 int nPathLegendaMessageReceived_;
00222 int nModuleLegendaMessageWithDataReceived_;
00223 int nPathLegendaMessageWithDataReceived_;
00224 int nModuleHistoMessageReceived_;
00225 int nPathHistoMessageReceived_;
00226 timeval runStartDetectedTimeStamp_;
00227 timeval lastModuleLegendaMessageTimeStamp_;
00228 timeval lastPathLegendaMessageTimeStamp_;
00229
00230 int nDatasetLegendaMessageReceived_;
00231 int nDatasetLegendaMessageWithDataReceived_;
00232 timeval lastDatasetLegendaMessageTimeStamp_;
00233
00234
00235 std::vector<unsigned int> epInstances;
00236 std::vector<unsigned int> epMax;
00237 std::vector<float> HTscaling;
00238 std::vector<unsigned int> nbMachines;
00239 std::vector<float> machineWeight;
00240 std::vector<float> machineWeightInst;
00241
00242 std::vector<std::string > endPathNames_;
00243 std::vector<std::string > datasetNames_;
00244
00245 class commonLsStat {
00246
00247 public:
00248 unsigned int ls_;
00249 std::vector<float> rateVec_;
00250 std::vector<float> busyVec_;
00251 std::vector<float> busyCPUVec_;
00252 std::vector<float> busyVecTheor_;
00253 std::vector<float> busyCPUVecTheor_;
00254 std::vector<unsigned int> nbMachines;
00255 std::vector<unsigned int> endPathCounts_;
00256 std::vector<unsigned int> datasetCounts_;
00257 commonLsStat(unsigned int lsid,unsigned int classes) {
00258 for (size_t i=0;i<classes;i++) {
00259 rateVec_.push_back(0.);
00260 busyVec_.push_back(0.);
00261 busyCPUVec_.push_back(0.);
00262 busyVecTheor_.push_back(0.);
00263 busyCPUVecTheor_.push_back(0.);
00264 nbMachines.push_back(0);
00265 }
00266 ls_=lsid;
00267 }
00268
00269 void setBusyForClass(unsigned int classIdx,float rate,float busy,float busyTheor, float busyCPU, float busyCPUTheor, unsigned int nMachineReports) {
00270 rateVec_[classIdx]=rate;
00271 busyVec_[classIdx]=busy;
00272 busyCPUVec_[classIdx]=busyCPU;
00273 busyVecTheor_[classIdx]=busyTheor;
00274 busyCPUVecTheor_[classIdx]=busyCPUTheor;
00275 nbMachines[classIdx]=nMachineReports;
00276 }
00277
00278 float getTotalRate() {
00279 float totRate=0;
00280 for (size_t i=0;i<rateVec_.size();i++) totRate+=rateVec_[i];
00281 return totRate;
00282 }
00283
00284 float getBusyTotalFrac(bool procstat,std::vector<float> & machineWeightInst) {
00285 double sum=0;
00286 double sumMachines=0;
00287 for (size_t i=0;i<busyVec_.size();i++) {
00288 if (!procstat)
00289 sum+=machineWeightInst[i]*nbMachines.at(i)*busyVec_[i];
00290 else
00291 sum+=machineWeightInst[i]*nbMachines.at(i)*busyCPUVec_[i];
00292 sumMachines+=machineWeightInst[i]*nbMachines.at(i);
00293 }
00294 if (sumMachines>0)
00295 return float(sum/sumMachines);
00296 else return 0.;
00297 }
00298
00299 float getBusyTotalFracTheor(bool procstat,std::vector<float> & machineWeight) {
00300 float sum=0;
00301 float sumMachines=0;
00302 for (size_t i=0;i<busyVecTheor_.size() && i<nbMachines.size();i++) {
00303 if (!procstat)
00304 sum+=machineWeight[i]*nbMachines[i]*busyVecTheor_[i];
00305 else
00306 sum+=machineWeight[i]*nbMachines[i]*busyCPUVecTheor_[i];
00307 sumMachines+=machineWeight[i]*nbMachines[i];
00308 }
00309 if (sumMachines>0)
00310 return sum/sumMachines;
00311 else return 0.;
00312 }
00313
00314 unsigned int getNReports() {
00315 unsigned int sum=0;
00316 for (size_t i=0;i<nbMachines.size();i++) sum+=nbMachines[i];
00317 return sum;
00318 }
00319
00320 std::string printInfo() {
00321 std::ostringstream info;
00322 for (size_t i=0;i<rateVec_.size();i++) {
00323 info << i << "/r:" << rateVec_[i] <<"/b:"<<busyVec_[i]<<"/n:"<<nbMachines[i]<<"; ";
00324 }
00325 return info.str();
00326 }
00327 };
00328
00329 class lsStat {
00330 public:
00331 unsigned int ls_;
00332 bool updated_;
00333 unsigned int nbSubs_;
00334 unsigned int nSampledNonIdle_;
00335 unsigned int nSampledNonIdle2_;
00336 unsigned int nSampledIdle_;
00337 unsigned int nSampledIdle2_;
00338 unsigned int nProc_;
00339 unsigned int nProc2_;
00340 unsigned int nCPUBusy_;
00341 unsigned int nReports_;
00342 unsigned int nMaxReports_;
00343 double rateAvg;
00344 double rateErr;
00345 double evtTimeAvg;
00346 double evtTimeErr;
00347 double fracWaitingAvg;
00348 double fracCPUBusy_;
00349 unsigned int nmodulenames_;
00350 unsigned int sumDeltaTms_;
00351 float avgDeltaT_;
00352 float avgDeltaT2_;
00353 std::pair<unsigned int,unsigned int> *moduleSamplingSums;
00354
00355 lsStat(unsigned int ls, unsigned int nbSubs,unsigned int maxreps,unsigned int nmodulenames):
00356 ls_(ls),updated_(true),nbSubs_(nbSubs),
00357 nSampledNonIdle_(0),nSampledNonIdle2_(0),nSampledIdle_(0),nSampledIdle2_(0),
00358 nProc_(0),nProc2_(0),nCPUBusy_(0),nReports_(0),nMaxReports_(maxreps),nmodulenames_(nmodulenames),
00359 sumDeltaTms_(0),avgDeltaT_(23),avgDeltaT2_(0)
00360 {
00361 moduleSamplingSums = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00362 for (unsigned int i=0;i<nmodulenames_;i++) {
00363 moduleSamplingSums[i].first=i;
00364 moduleSamplingSums[i].second=0;
00365 }
00366 }
00367
00368 ~lsStat() {
00369 delete moduleSamplingSums;
00370 }
00371
00372 void update(unsigned int nSampledNonIdle,unsigned int nSampledIdle,
00373 unsigned int nProc,unsigned int ncpubusy, unsigned int deltaTms)
00374 {
00375 nReports_++;
00376 nSampledNonIdle_+=nSampledNonIdle;
00377 nSampledNonIdle2_+=pow(nSampledNonIdle,2);
00378 nSampledIdle_+=nSampledIdle;
00379 nSampledIdle2_+=pow(nSampledIdle,2);
00380 nProc_+=nProc;
00381 nProc2_+=pow(nProc,2);
00382 nCPUBusy_+=ncpubusy;
00383 sumDeltaTms_+=deltaTms;
00384 updated_=true;
00385 }
00386
00387 std::pair<unsigned int,unsigned int> * getModuleSamplingPtr() {
00388 return moduleSamplingSums;
00389 }
00390
00391 void deleteModuleSamplingPtr() {
00392 delete moduleSamplingSums;
00393 moduleSamplingSums=nullptr;
00394 nmodulenames_=0;
00395 }
00396
00397 void calcStat()
00398 {
00399 if (!updated_) return;
00400 if (nReports_) {
00401 float tinv = 0.001/nReports_;
00402 fracCPUBusy_=nCPUBusy_*tinv;
00403 avgDeltaT_ = avgDeltaT2_ = sumDeltaTms_*tinv;
00404 if (avgDeltaT_==0.) {
00405 avgDeltaT_=23.;
00406 avgDeltaT2_=0;
00407 }
00408 rateAvg=nProc_ / avgDeltaT_;
00409 rateErr=sqrt(fabs(nProc2_ - pow(nProc_,2)))/avgDeltaT_;
00410 }
00411 else {
00412 fracCPUBusy_=0.;
00413 rateAvg=0.;
00414 rateErr=0.;
00415 avgDeltaT_=23.;
00416 }
00417
00418 evtTimeAvg=0.;evtTimeErr=0.;fracWaitingAvg=1.;
00419 unsigned int sampled = nSampledNonIdle_+nSampledIdle_;
00420 if (rateAvg!=0. && sampled) {
00421 float nAllInv = 1./sampled;
00422 fracWaitingAvg= nSampledIdle_*nAllInv;
00423 double nSampledIdleErr2=fabs(nSampledIdle2_ - pow(nSampledIdle_,2));
00424 double nSampledNonIdleErr2=fabs(nSampledNonIdle2_ - pow(nSampledNonIdle_,2));
00425 double fracWaitingAvgErr= sqrt(
00426 (pow(nSampledIdle_,2)*nSampledNonIdleErr2
00427 + pow(nSampledNonIdle_,2)*nSampledIdleErr2))*pow(nAllInv,2);
00428 float rateAvgInv=1./rateAvg;
00429 evtTimeAvg=nbSubs_ * nReports_ * (1.-fracWaitingAvg)*rateAvgInv;
00430 evtTimeErr = nbSubs_ * nReports_ * sqrt(pow(fracWaitingAvg*rateErr*pow(rateAvgInv,2),2) + pow(fracWaitingAvgErr*rateAvgInv,2));
00431 }
00432 updated_=false;
00433 }
00434
00435 float getRate() {
00436 if (updated_) calcStat();
00437 return rateAvg;
00438 }
00439
00440 float getRateErr() {
00441 if (updated_) calcStat();
00442 return rateErr;
00443 }
00444
00445 float getRatePerMachine() {
00446 if (updated_) calcStat();
00447 if (nReports_)
00448 return rateAvg/(1.*nReports_);
00449 return 0.;
00450 }
00451
00452 float getRateErrPerMachine() {
00453 if (updated_) calcStat();
00454 if (nReports_)
00455 return rateErr/(1.*nReports_);
00456 return 0.;
00457 }
00458
00459 float getEvtTime() {
00460 if (updated_) calcStat();
00461 return evtTimeAvg;
00462 }
00463
00464 float getEvtTimeErr() {
00465 if (updated_) calcStat();
00466 return evtTimeErr;
00467 }
00468
00469 unsigned int getNSampledNonIdle() {
00470 if (updated_) calcStat();
00471 return nSampledNonIdle_;
00472 }
00473
00474 float getFracBusy() {
00475 if (updated_) calcStat();
00476 return 1.-fracWaitingAvg;
00477 }
00478
00479 float getFracCPUBusy() {
00480 if (updated_) calcStat();
00481 return fracCPUBusy_;
00482 }
00483
00484 unsigned int getReports() {
00485 return nReports_;
00486 }
00487
00488 float getDt() {
00489 if (updated_) calcStat();
00490 return avgDeltaT2_;
00491 }
00492
00493 std::vector<std::pair<unsigned int, unsigned int>> getOffendersVector() {
00494 std::vector<std::pair<unsigned int, unsigned int>> ret;
00495 if (updated_) calcStat();
00496 if (moduleSamplingSums) {
00497
00498 std::pair<unsigned int,unsigned int> *moduleSumsCopy = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00499 memcpy(moduleSumsCopy,moduleSamplingSums,nmodulenames_*sizeof(std::pair<unsigned int,unsigned int>));
00500
00501 std::qsort((void *)moduleSumsCopy, nmodulenames_,
00502 sizeof(std::pair<unsigned int,unsigned int>), modlistSortFunction);
00503
00504 unsigned int count=0;
00505 unsigned int saveidx=0;
00506 while (saveidx < MODNAMES && count<nmodulenames_)
00507 {
00508 if (moduleSumsCopy[count].first==2) {count++;continue;}
00509 ret.push_back(moduleSumsCopy[count]);
00510 saveidx++;
00511 count++;
00512 }
00513 delete moduleSumsCopy;
00514 }
00515 return ret;
00516 }
00517
00518 float getOffenderFracAt(unsigned int x) {
00519 if (x<nmodulenames_) {
00520 if (updated_) calcStat();
00521 float total = nSampledNonIdle_+nSampledIdle_;
00522 if (total>0.) {
00523 for (size_t i=0;i<nmodulenames_;i++) {
00524 if (moduleSamplingSums[i].first==x)
00525 return moduleSamplingSums[i].second/total;
00526 }
00527 }
00528 }
00529 return 0.;
00530 }
00531 };
00532
00533
00534
00535 boost::shared_ptr<std::vector<edm::ParameterSet> > pServiceSets_;
00536 edm::ServiceToken serviceToken_;
00537 edm::EventProcessor *evtProcessor_;
00538 bool meInitialized_;
00539 bool meInitializedStreams_;
00540 bool meInitializedDatasets_;
00541 DQMService *dqmService_;
00542 DQMStore *dqmStore_;
00543 std::string configString_;
00544 xdata::Boolean dqmEnabled_;
00545
00546 std::map<unsigned int,int> nbSubsList;
00547 std::map<int,unsigned int> nbSubsListInv;
00548 unsigned int nbSubsClasses;
00549 std::vector<MonitorElement*> meVecRate_;
00550 std::vector<MonitorElement*> meVecTime_;
00551 std::vector<MonitorElement*> meVecOffenders_;
00552 MonitorElement * rateSummary_;
00553 MonitorElement * reportPeriodSummary_;
00554 MonitorElement * timingSummary_;
00555 MonitorElement * busySummary_;
00556 MonitorElement * busySummary2_;
00557 MonitorElement * busySummaryUncorr1_;
00558 MonitorElement * busySummaryUncorr2_;
00559 MonitorElement * fuReportsSummary_;
00560 MonitorElement * daqBusySummary_;
00561 MonitorElement * daqBusySummary2_;
00562 MonitorElement * busyModules_;
00563 unsigned int summaryLastLs_;
00564 std::vector<std::map<unsigned int, unsigned int> > occupancyNameMap;
00565
00566 std::deque<commonLsStat*> commonLsHistory;
00567 std::deque<lsStat*> * lsHistory;
00568
00569
00570 std::vector<MonitorElement *> endPathRates_;
00571
00572
00573 std::vector<MonitorElement *> datasetRates_;
00574
00575 std::vector<unsigned int> currentLs_;
00576
00577 xdata::UnsignedInteger32 saveLsInterval_;
00578 unsigned int ilumiprev_;
00579 std::list<std::string> pastSavedFiles_;
00580 xdata::String dqmSaveDir_;
00581 xdata::Boolean dqmFilesWritable_;
00582 xdata::String topLevelFolder_;
00583 unsigned int savedForLs_;
00584 std::string fileBaseName_;
00585 bool writeDirectoryPresent_;
00586
00587 timeval * reportingStart_;
00588 unsigned int lastSavedForTime_;
00589
00590 unsigned int dsMismatch;
00591 };
00592
00593 int modlistSortFunction( const void *a, const void *b)
00594 {
00595 std::pair<unsigned int,unsigned int> intOne = *((std::pair<unsigned int,unsigned int>*)a);
00596 std::pair<unsigned int,unsigned int> intTwo = *((std::pair<unsigned int,unsigned int>*)b);
00597 if (intOne.second > intTwo.second)
00598 return -1;
00599 if (intOne.second == intTwo.second)
00600 return 0;
00601 return 1;
00602 }
00603
00604 float fround(float val, float mod) {
00605 return val - fmod(val,mod);
00606 }
00607
00608 }
00609
00610
00611 #endif