00001 #ifndef EVENTFILTER_GOODIES_IDIE_H
00002 #define EVENTFILTER_GOODIES_IDIE_H
00003
00004 #include "EventFilter/Utilities/interface/Exception.h"
00005 #include "EventFilter/Utilities/interface/TriggerReportDef.h"
00006
00007 #include "xdata/String.h"
00008 #include "xdata/UnsignedInteger32.h"
00009 #include "xdata/Boolean.h"
00010 #include "xdata/ActionListener.h"
00011
00012 #include "xoap/MessageReference.h"
00013 #include "xoap/MessageFactory.h"
00014 #include "xoap/Method.h"
00015
00016 #include "xgi/Utils.h"
00017 #include "xgi/Input.h"
00018 #include "xgi/Output.h"
00019 #include "xgi/Method.h"
00020
00021 #include "xdaq/Application.h"
00022
00023 #include "toolbox/net/URN.h"
00024 #include "toolbox/fsm/exception/Exception.h"
00025
00026
00027 #include <vector>
00028 #include <deque>
00029
00030 #include <sys/time.h>
00031
00032 #include "TFile.h"
00033 #include "TTree.h"
00034
00035 #include "FWCore/Framework/interface/EventProcessor.h"
00036 #include "DQMServices/Core/src/DQMService.h"
00037 #include "DQMServices/Core/interface/DQMStore.h"
00038 #include "DQMServices/Core/interface/MonitorElement.h"
00039
00040 #define MODLZSIZE 25
00041 #define MODLZSIZELUMI 20
00042 #define MOD_OCC_THRESHOLD 5
00043
00044 namespace evf {
00045
00046 int modlistSortFunction( const void *a, const void *b);
00047
00048 namespace internal{
00049 struct fu{
00050 time_t tstamp;
00051 unsigned int ccount;
00052 std::vector<pid_t> cpids;
00053 std::vector<std::string> signals;
00054 std::vector<std::string> stacktraces;
00055 };
00056 struct rate{
00057 int nproc;
00058 int nsub;
00059 int nrep;
00060 int npath;
00061 int nendpath;
00062 int ptimesRun[evf::max_paths];
00063 int ptimesPassedPs[evf::max_paths];
00064 int ptimesPassedL1[evf::max_paths];
00065 int ptimesPassed[evf::max_paths];
00066 int ptimesFailed[evf::max_paths];
00067 int ptimesExcept[evf::max_paths];
00068 int etimesRun[evf::max_endpaths];
00069 int etimesPassedPs[evf::max_endpaths];
00070 int etimesPassedL1[evf::max_endpaths];
00071 int etimesPassed[evf::max_endpaths];
00072 int etimesFailed[evf::max_endpaths];
00073 int etimesExcept[evf::max_endpaths];
00074 };
00075
00076 }
00077 typedef std::map<std::string,internal::fu> fmap;
00078 typedef fmap::iterator ifmap;
00079
00080 class iDie : public xdaq::Application,
00081 public xdata::ActionListener
00082 {
00083 public:
00084
00085
00086
00087 XDAQ_INSTANTIATOR();
00088
00089
00090
00091
00092
00093 iDie(xdaq::ApplicationStub *s);
00094 virtual ~iDie();
00095
00096 void defaultWeb(xgi::Input *in,xgi::Output *out)
00097 throw (xgi::exception::Exception);
00098 void summaryTable(xgi::Input *in,xgi::Output *out)
00099 throw (xgi::exception::Exception);
00100 void detailsTable(xgi::Input *in,xgi::Output *out)
00101 throw (xgi::exception::Exception);
00102 void dumpTable(xgi::Input *in,xgi::Output *out)
00103 throw (xgi::exception::Exception);
00104 void updater(xgi::Input *in,xgi::Output *out)
00105 throw (xgi::exception::Exception);
00106 void iChoke(xgi::Input *in,xgi::Output *out)
00107 throw (xgi::exception::Exception);
00108 void iChokeMiniInterface(xgi::Input *in,xgi::Output *out)
00109 throw (xgi::exception::Exception);
00110 void spotlight(xgi::Input *in,xgi::Output *out)
00111 throw (xgi::exception::Exception);
00112
00113 void postEntry(xgi::Input*in,xgi::Output*out)
00114 throw (xgi::exception::Exception);
00115 void postEntryiChoke(xgi::Input*in,xgi::Output*out)
00116 throw (xgi::exception::Exception);
00117
00118
00119 xoap::MessageReference fsmCallback(xoap::MessageReference msg)
00120 throw (xoap::exception::Exception);
00121
00122
00123 void actionPerformed(xdata::Event& e);
00124
00125
00126 private:
00127
00128 struct sorted_indices{
00129 sorted_indices(const std::vector<int> &arr) : arr_(arr)
00130 {
00131 ind_.resize(arr_.size(),0);
00132 unsigned int i = 0;
00133 while(i<ind_.size()) {ind_[i] = i; i++;}
00134 std::sort(ind_.rbegin(),ind_.rend(),*this);
00135 }
00136 int operator[](size_t ind) const {return arr_[ind_[ind]];}
00137
00138 bool operator()(const size_t a, const size_t b) const
00139 {
00140 return arr_[a]<arr_[b];
00141 }
00142 int ii(size_t ind){return ind_[ind];}
00143 std::vector<int> ind_;
00144 const std::vector<int> &arr_;
00145 };
00146
00147
00148
00149 class lsStat;
00150 class commonLsStat;
00151
00152 void reset();
00153 void parseModuleLegenda(std::string);
00154 void parseModuleHisto(const char *, unsigned int);
00155 void parsePathLegenda(std::string);
00156 void parsePathHisto(const unsigned char *, unsigned int);
00157 void initFramework();
00158 void deleteFramework();
00159 void initMonitorElements();
00160 void fillDQMStatHist(unsigned int nbsIdx, unsigned int lsid);
00161 void fillDQMModFractionHist(unsigned int nbsIdx, unsigned int lsid, unsigned int nonIdle,
00162 std::vector<std::pair<unsigned int, unsigned int>> offenders);
00163
00164 void updateRollingHistos(unsigned int nbsIdx, unsigned int lsid, lsStat & lst, commonLsStat & clst, bool roll);
00165 void doFlush();
00166 void perLumiFileSaver(unsigned int lsid);
00167
00168
00169
00170
00171
00172 Logger log_;
00173 std::string dqmState_;
00174
00175 xdata::String url_;
00176 xdata::String class_;
00177 xdata::UnsignedInteger32 instance_;
00178 xdata::String hostname_;
00179 xdata::UnsignedInteger32 runNumber_;
00180 xdata::String dqmCollectorHost_;
00181 xdata::String dqmCollectorPort_;
00182 fmap fus_;
00183
00184 unsigned int totalCores_;
00185 unsigned int nstates_;
00186 std::vector<int> cpuentries_;
00187 std::vector<std::vector<int> > cpustat_;
00188 std::vector<std::string> mapmod_;
00189 unsigned int last_ls_;
00190 std::vector<TriggerReportStatic>trp_;
00191 std::vector<int> trpentries_;
00192 std::vector<std::string> mappath_;
00193
00194 TFile *f_;
00195 TTree *t_;
00196 TBranch *b_;
00197 TBranch *b1_;
00198 TBranch *b2_;
00199 TBranch *b3_;
00200 TBranch *b4_;
00201 int *datap_;
00202 TriggerReportStatic *trppriv_;
00203 internal::rate r_;
00204
00205
00206 int nModuleLegendaMessageReceived_;
00207 int nPathLegendaMessageReceived_;
00208 int nModuleLegendaMessageWithDataReceived_;
00209 int nPathLegendaMessageWithDataReceived_;
00210 int nModuleHistoMessageReceived_;
00211 int nPathHistoMessageReceived_;
00212 timeval runStartDetectedTimeStamp_;
00213 timeval lastModuleLegendaMessageTimeStamp_;
00214 timeval lastPathLegendaMessageTimeStamp_;
00215
00216
00217 std::vector<unsigned int> epInstances;
00218 std::vector<unsigned int> epMax;
00219 std::vector<float> HTscaling;
00220 std::vector<unsigned int> nbMachines;
00221 std::vector<float> machineWeight;
00222 std::vector<float> machineWeightInst;
00223
00224 class commonLsStat {
00225
00226 public:
00227 unsigned int ls_;
00228 std::vector<unsigned int> rateVec_;
00229 std::vector<float> busyVec_;
00230 std::vector<float> busyCPUVec_;
00231 std::vector<float> busyVecTheor_;
00232 std::vector<float> busyCPUVecTheor_;
00233 std::vector<unsigned int> nbMachines;
00234 commonLsStat(unsigned int lsid,unsigned int classes) {
00235 for (size_t i=0;i<classes;i++) {
00236 rateVec_.push_back(0.);
00237 busyVec_.push_back(0.);
00238 busyCPUVec_.push_back(0.);
00239 busyVecTheor_.push_back(0.);
00240 busyCPUVecTheor_.push_back(0.);
00241 nbMachines.push_back(0);
00242 }
00243 ls_=lsid;
00244 }
00245 void setBusyForClass(unsigned int classIdx,unsigned int rate,float busy,float busyTheor, float busyCPU, float busyCPUTheor, unsigned int nMachineReports) {
00246 rateVec_[classIdx]=rate;
00247 busyVec_[classIdx]=busy;
00248 busyCPUVec_[classIdx]=busyCPU;
00249 busyVecTheor_[classIdx]=busyTheor;
00250 busyCPUVecTheor_[classIdx]=busyCPUTheor;
00251 nbMachines[classIdx]=nMachineReports;
00252 }
00253
00254 unsigned int getTotalRate() {
00255 unsigned int totRate=0;
00256 for (size_t i=0;i<rateVec_.size();i++) totRate+=rateVec_[i];
00257 return totRate;
00258 }
00259
00260 float getBusyTotalFrac(bool procstat,std::vector<float> & machineWeightInst) {
00261 double sum=0;
00262 double sumMachines=0;
00263 for (size_t i=0;i<busyVec_.size();i++) {
00264 if (!procstat)
00265 sum+=machineWeightInst[i]*nbMachines.at(i)*busyVec_[i];
00266 else
00267 sum+=machineWeightInst[i]*nbMachines.at(i)*busyCPUVec_[i];
00268 sumMachines+=machineWeightInst[i]*nbMachines.at(i);
00269 }
00270 if (sumMachines>0)
00271 return float(sum/sumMachines);
00272 else return 0.;
00273 }
00274
00275 float getBusyTotalFracTheor(bool procstat,std::vector<float> & machineWeight) {
00276 float sum=0;
00277 float sumMachines=0;
00278 for (size_t i=0;i<busyVecTheor_.size() && i<nbMachines.size();i++) {
00279 if (!procstat)
00280 sum+=machineWeight[i]*nbMachines[i]*busyVecTheor_[i];
00281 else
00282 sum+=machineWeight[i]*nbMachines[i]*busyCPUVecTheor_[i];
00283 sumMachines+=machineWeight[i]*nbMachines[i];
00284 }
00285 if (sumMachines>0)
00286 return sum/sumMachines;
00287 else return 0.;
00288 }
00289
00290 unsigned int getNReports() {
00291 unsigned int sum=0;
00292 for (size_t i=0;i<nbMachines.size();i++) sum+=nbMachines[i];
00293 return sum;
00294 }
00295
00296 std::string printInfo() {
00297 std::ostringstream info;
00298 for (size_t i=0;i<rateVec_.size();i++) {
00299 info << i << "/r:" << rateVec_[i] <<"/b:"<<busyVec_[i]<<"/n:"<<nbMachines[i]<<"; ";
00300 }
00301 return info.str();
00302 }
00303 };
00304
00305 class lsStat {
00306 public:
00307 unsigned int ls_;
00308 bool updated_;
00309 unsigned int nbSubs_;
00310 unsigned int nSampledNonIdle_;
00311 unsigned int nSampledNonIdle2_;
00312 unsigned int nSampledIdle_;
00313 unsigned int nSampledIdle2_;
00314 unsigned int nProc_;
00315 unsigned int nProc2_;
00316 unsigned int nCPUBusy_;
00317 unsigned int nReports_;
00318 unsigned int nMaxReports_;
00319 double rateAvg;
00320 double rateErr;
00321 double evtTimeAvg;
00322 double evtTimeErr;
00323 double fracWaitingAvg;
00324 double fracCPUBusy_;
00325 unsigned int nmodulenames_;
00326 std::pair<unsigned int,unsigned int> *moduleSamplingSums;
00327
00328 lsStat(unsigned int ls, unsigned int nbSubs,unsigned int maxreps,unsigned int nmodulenames):
00329 ls_(ls),updated_(false),nbSubs_(nbSubs),
00330 nSampledNonIdle_(0),nSampledNonIdle2_(0),nSampledIdle_(0),nSampledIdle2_(0),
00331 nProc_(0),nProc2_(0),nCPUBusy_(0),nReports_(0),nMaxReports_(maxreps),nmodulenames_(nmodulenames)
00332 {
00333 moduleSamplingSums = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00334 for (unsigned int i=0;i<nmodulenames_;i++) {
00335 moduleSamplingSums[i].first=i;
00336 moduleSamplingSums[i].second=0;
00337 }
00338 }
00339
00340 void update(unsigned int nSampledNonIdle,unsigned int nSampledIdle, unsigned int nProc,unsigned int ncpubusy) {
00341 nReports_++;
00342 nSampledNonIdle_+=nSampledNonIdle;
00343 nSampledNonIdle2_+=pow(nSampledNonIdle,2);
00344 nSampledIdle_+=nSampledIdle;
00345 nSampledIdle2_+=pow(nSampledIdle,2);
00346 nProc_+=nProc;
00347 nProc2_+=pow(nProc,2);
00348 nCPUBusy_+=ncpubusy;
00349 updated_=true;
00350 }
00351
00352 std::pair<unsigned int,unsigned int> * getModuleSamplingPtr() {
00353 return moduleSamplingSums;
00354 }
00355
00356 void deleteModuleSamplingPtr() {
00357 delete moduleSamplingSums;
00358 moduleSamplingSums=nullptr;
00359 nmodulenames_=0;
00360 }
00361
00362 void calcStat()
00363 {
00364 if (!updated_) return;
00365 rateAvg=nProc_ / 23.;
00366 rateErr=sqrt(fabs(nProc2_ - pow(nProc_,2)))/23.;
00367 if (rateAvg==0.) {rateErr=0.;evtTimeAvg=0.;evtTimeErr=0.;fracWaitingAvg=0;}
00368 else {
00369 if (nSampledNonIdle_+nSampledIdle_!=0) {
00370 float nAllInv = 1./(nSampledNonIdle_+nSampledIdle_);
00371 fracWaitingAvg= nSampledIdle_*nAllInv;
00372 double nSampledIdleErr2=fabs(nSampledIdle2_ - pow(nSampledIdle_,2));
00373 double nSampledNonIdleErr2=fabs(nSampledNonIdle2_ - pow(nSampledNonIdle_,2));
00374 double fracWaitingAvgErr= sqrt(
00375 (pow(nSampledIdle_,2)*nSampledNonIdleErr2
00376 + pow(nSampledNonIdle_,2)*nSampledIdleErr2))*pow(nAllInv,2);
00377 if (rateAvg) {
00378 float rateAvgInv=1./rateAvg;
00379 evtTimeAvg=nbSubs_ * nReports_ * (1.-fracWaitingAvg)*rateAvgInv;
00380 evtTimeErr = nbSubs_ * nReports_ * sqrt(pow(fracWaitingAvg*rateErr*pow(rateAvgInv,2),2) + pow(fracWaitingAvgErr*rateAvgInv,2));
00381 }
00382 else {
00383 evtTimeAvg=0;
00384 evtTimeErr=0;
00385 }
00386 }
00387 }
00388 if (nReports_) fracCPUBusy_=nCPUBusy_/(nReports_*1000.);
00389 else fracCPUBusy_=0.;
00390 updated_=false;
00391 }
00392
00393 float getRate() {
00394 if (updated_) calcStat();
00395 return rateAvg;
00396 }
00397
00398 float getRateErr() {
00399 if (updated_) calcStat();
00400 return rateErr;
00401 }
00402
00403 float getRatePerMachine() {
00404 if (updated_) calcStat();
00405 if (nReports_)
00406 return rateAvg/(1.*nReports_);
00407 return 0.;
00408 }
00409
00410 float getRateErrPerMachine() {
00411 if (updated_) calcStat();
00412 if (nReports_)
00413 return rateErr/(1.*nReports_);
00414 return 0.;
00415 }
00416
00417 float getEvtTime() {
00418 if (updated_) calcStat();
00419 return evtTimeAvg;
00420 }
00421
00422 float getEvtTimeErr() {
00423 if (updated_) calcStat();
00424 return evtTimeErr;
00425 }
00426
00427 unsigned int getNSampledNonIdle() {
00428 if (updated_) calcStat();
00429 return nSampledNonIdle_;
00430 }
00431
00432 float getFracBusy() {
00433 if (updated_) calcStat();
00434 return 1.-fracWaitingAvg;
00435 }
00436
00437 float getFracCPUBusy() {
00438 if (updated_) calcStat();
00439 return fracCPUBusy_;
00440 }
00441
00442 unsigned int getReports() {
00443 return nReports_;
00444 }
00445
00446 std::vector<std::pair<unsigned int, unsigned int>> getOffendersVector() {
00447 std::vector<std::pair<unsigned int, unsigned int>> ret;
00448 if (updated_) calcStat();
00449 if (moduleSamplingSums) {
00450 std::qsort((void *)moduleSamplingSums, nmodulenames_,
00451 sizeof(std::pair<unsigned int,unsigned int>), modlistSortFunction);
00452 unsigned int count=0;
00453 unsigned int saveidx=0;
00454 while (saveidx < MODLZSIZE && count<nmodulenames_ && saveidx<MODLZSIZE)
00455 {
00456 if (moduleSamplingSums[count].first==2) {count++;continue;}
00457 ret.push_back(moduleSamplingSums[count]);
00458 saveidx++;
00459 count++;
00460 }
00461 }
00462 return ret;
00463 }
00464 };
00465
00466
00467
00468 boost::shared_ptr<std::vector<edm::ParameterSet> > pServiceSets_;
00469 edm::ServiceToken serviceToken_;
00470 edm::EventProcessor *evtProcessor_;
00471 bool meInitialized_;
00472 DQMService *dqmService_;
00473 DQMStore *dqmStore_;
00474 std::string configString_;
00475 xdata::Boolean dqmEnabled_;
00476
00477 std::map<unsigned int,int> nbSubsList;
00478 std::map<int,unsigned int> nbSubsListInv;
00479 unsigned int nbSubsClasses;
00480 std::vector<MonitorElement*> meVecRate_;
00481 std::vector<MonitorElement*> meVecTime_;
00482 std::vector<MonitorElement*> meVecOffenders_;
00483 MonitorElement * rateSummary_;
00484 MonitorElement * timingSummary_;
00485 MonitorElement * busySummary_;
00486 MonitorElement * busySummary2_;
00487 MonitorElement * fuReportsSummary_;
00488 MonitorElement * daqBusySummary_;
00489 unsigned int summaryLastLs_;
00490 std::vector<std::map<unsigned int, unsigned int> > occupancyNameMap;
00491
00492 std::deque<commonLsStat> commonLsHistory;
00493 std::deque<lsStat> *lsHistory;
00494
00495 std::vector<unsigned int> currentLs_;
00496
00497 xdata::UnsignedInteger32 saveLsInterval_;
00498 unsigned int ilumiprev_;
00499 std::list<std::string> pastSavedFiles_;
00500 xdata::String dqmSaveDir_;
00501 xdata::Boolean dqmFilesWritable_;
00502 xdata::String topLevelFolder_;
00503 unsigned int savedForLs_;
00504 std::string fileBaseName_;
00505 bool writeDirectoryPresent_;
00506 };
00507
00508 int modlistSortFunction( const void *a, const void *b)
00509 {
00510 std::pair<unsigned int,unsigned int> intOne = *((std::pair<unsigned int,unsigned int>*)a);
00511 std::pair<unsigned int,unsigned int> intTwo = *((std::pair<unsigned int,unsigned int>*)b);
00512 if (intOne.second > intTwo.second)
00513 return -1;
00514 if (intOne.second == intTwo.second)
00515 return 0;
00516 return 1;
00517 }
00518
00519
00520 }
00521
00522
00523 #endif