CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_6/src/EventFilter/Goodies/src/iDie.h

Go to the documentation of this file.
00001 #ifndef EVENTFILTER_GOODIES_IDIE_H
00002 #define EVENTFILTER_GOODIES_IDIE_H
00003 
00004 #include "EventFilter/Utilities/interface/Exception.h"
00005 #include "EventFilter/Utilities/interface/TriggerReportDef.h"
00006 
00007 #include "xdata/String.h"
00008 #include "xdata/UnsignedInteger32.h"
00009 #include "xdata/Boolean.h"
00010 #include "xdata/ActionListener.h"
00011 
00012 #include "xoap/MessageReference.h"
00013 #include "xoap/MessageFactory.h"
00014 #include "xoap/Method.h"
00015 
00016 #include "xgi/Utils.h"
00017 #include "xgi/Input.h"
00018 #include "xgi/Output.h"
00019 #include "xgi/Method.h"
00020 
00021 #include "xdaq/Application.h"
00022 
00023 #include "toolbox/net/URN.h"
00024 #include "toolbox/fsm/exception/Exception.h"
00025 
00026 
00027 #include <vector>
00028 #include <deque>
00029 
00030 #include <sys/time.h>
00031 #include <math.h>
00032 
00033 #include "TFile.h"
00034 #include "TTree.h"
00035 
00036 #include "FWCore/Framework/interface/EventProcessor.h"
00037 #include "DQMServices/Core/src/DQMService.h"
00038 #include "DQMServices/Core/interface/DQMStore.h"
00039 #include "DQMServices/Core/interface/MonitorElement.h"
00040 
00041 #define MODNAMES 25
00042 
00043 namespace evf {
00044 
00045   int modlistSortFunction( const void *a, const void *b);
00046 
00047   namespace internal{
00048    struct fu{
00049       time_t tstamp;
00050       unsigned int ccount;
00051       std::vector<pid_t> cpids;
00052       std::vector<std::string> signals;
00053       std::vector<std::string> stacktraces;
00054     };
00055    struct rate{
00056      int nproc;
00057      int nsub;
00058      int nrep;
00059      int npath;
00060      int nendpath;
00061      int ptimesRun[evf::max_paths];
00062      int ptimesPassedPs[evf::max_paths];
00063      int ptimesPassedL1[evf::max_paths];
00064      int ptimesPassed[evf::max_paths];
00065      int ptimesFailed[evf::max_paths];
00066      int ptimesExcept[evf::max_paths];
00067      int etimesRun[evf::max_endpaths];
00068      int etimesPassedPs[evf::max_endpaths];
00069      int etimesPassedL1[evf::max_endpaths];
00070      int etimesPassed[evf::max_endpaths];
00071      int etimesFailed[evf::max_endpaths];
00072      int etimesExcept[evf::max_endpaths];
00073    };
00074 
00075   }
00076   typedef std::map<std::string,internal::fu> fmap;
00077   typedef fmap::iterator ifmap;
00078   
00079   class iDie : public xdaq::Application,
00080     public xdata::ActionListener
00081   {
00082   public:
00083     //
00084     // xdaq instantiator macro
00085     //
00086     XDAQ_INSTANTIATOR();
00087   
00088     
00089     //
00090     // construction/destruction
00091     //
00092     iDie(xdaq::ApplicationStub *s);
00093     virtual ~iDie();
00094     //UI
00095     void defaultWeb(xgi::Input *in,xgi::Output *out)
00096       throw (xgi::exception::Exception);
00097     void summaryTable(xgi::Input *in,xgi::Output *out)
00098       throw (xgi::exception::Exception);
00099     void detailsTable(xgi::Input *in,xgi::Output *out)
00100       throw (xgi::exception::Exception);
00101     void dumpTable(xgi::Input *in,xgi::Output *out)
00102       throw (xgi::exception::Exception);
00103     void updater(xgi::Input *in,xgi::Output *out)
00104       throw (xgi::exception::Exception);
00105     void iChoke(xgi::Input *in,xgi::Output *out)
00106       throw (xgi::exception::Exception);
00107     void iChokeMiniInterface(xgi::Input *in,xgi::Output *out)
00108       throw (xgi::exception::Exception);
00109     void spotlight(xgi::Input *in,xgi::Output *out)
00110       throw (xgi::exception::Exception);
00111     //AI
00112     void postEntry(xgi::Input*in,xgi::Output*out)
00113       throw (xgi::exception::Exception);
00114     void postEntryiChoke(xgi::Input*in,xgi::Output*out)
00115       throw (xgi::exception::Exception);
00116     
00117     // *fake* fsm soap command callback
00118     xoap::MessageReference fsmCallback(xoap::MessageReference msg)
00119       throw (xoap::exception::Exception);
00120 
00121     // xdata:ActionListener interface
00122     void actionPerformed(xdata::Event& e);
00123 
00124 
00125   private:
00126 
00127     struct sorted_indices{
00128       sorted_indices(const std::vector<int> &arr) : arr_(arr)
00129       {
00130         ind_.resize(arr_.size(),0);
00131         unsigned int i = 0;
00132         while(i<ind_.size()) {ind_[i] = i; i++;}
00133         std::sort(ind_.rbegin(),ind_.rend(),*this);
00134       }
00135       int operator[](size_t ind) const {return arr_[ind_[ind]];}
00136       
00137       bool operator()(const size_t a, const size_t b) const
00138       {
00139         return arr_[a]<arr_[b];
00140       }
00141       int ii(size_t ind){return ind_[ind];}
00142       std::vector<int> ind_;
00143       const std::vector<int> &arr_;
00144     };
00145     //
00146     // private member functions
00147     //
00148     class lsStat;
00149     class commonLsStat;
00150 
00151     void reset();
00152     void parseModuleLegenda(std::string);
00153     void parseModuleHisto(const char *, unsigned int);
00154     void parsePathLegenda(std::string);
00155     void parseDatasetLegenda(std::string);
00156     void parsePathHisto(const unsigned char *, unsigned int);
00157     void initFramework();
00158     void deleteFramework();
00159     void initMonitorElements();
00160     void initMonitorElementsStreams();
00161     void initMonitorElementsDatasets();
00162     void fillDQMStatHist(unsigned int nbsIdx, unsigned int lsid);
00163     void fillDQMModFractionHist(unsigned int nbsIdx, unsigned int lsid, unsigned int nonIdle,
00164                                  std::vector<std::pair<unsigned int, unsigned int>> offenders);
00165  
00166     void updateRollingHistos(unsigned int nbsIdx, unsigned int lsid, lsStat * lst, commonLsStat * clst, bool roll);
00167     void updateStreamHistos(unsigned int forls, commonLsStat *clst, commonLsStat *prevclst);
00168     void updateDatasetHistos(unsigned int forls, commonLsStat *clst, commonLsStat *prevclst);
00169     void doFlush();
00170     void perLumiFileSaver(unsigned int lsid);
00171     void perTimeFileSaver();
00172     //
00173     // member data
00174     //
00175 
00176     // message logger
00177     Logger                          log_;
00178     std::string                     dqmState_;          
00179     // monitored parameters
00180     xdata::String                   url_;
00181     xdata::String                   class_;
00182     xdata::UnsignedInteger32        instance_;
00183     xdata::String                   hostname_;
00184 
00185     xdata::UnsignedInteger32        runNumber_;
00186     unsigned int                    lastRunNumberSet_;
00187 
00188     //run info
00189     MonitorElement * runId_;
00190     MonitorElement * lumisecId_;
00191     MonitorElement * eventId_;
00192     MonitorElement * eventTimeStamp_;
00193 
00194     xdata::String                   dqmCollectorHost_;
00195     xdata::String                   dqmCollectorPort_;
00196     fmap                            fus_;
00197     
00198     unsigned int                    totalCores_;
00199     unsigned int                    nstates_;   
00200     std::vector<int>                cpuentries_;
00201     std::vector<std::vector<int> >  cpustat_;
00202     std::vector<std::string>        mapmod_;
00203     unsigned int                    last_ls_;
00204     std::vector<TriggerReportStatic>trp_;
00205     std::vector<int>                trpentries_;
00206     std::vector<std::string>        mappath_;
00207     //root stuff
00208     TFile                          *f_;
00209     TTree                          *t_;
00210     TBranch                        *b_;
00211     TBranch                        *b1_;
00212     TBranch                        *b2_;
00213     TBranch                        *b3_;
00214     TBranch                        *b4_;
00215     int                            *datap_;
00216     TriggerReportStatic            *trppriv_;
00217     internal::rate                  r_;
00218 
00219     //message statistics 
00220     int                             nModuleLegendaMessageReceived_;
00221     int                             nPathLegendaMessageReceived_;
00222     int                             nModuleLegendaMessageWithDataReceived_;
00223     int                             nPathLegendaMessageWithDataReceived_;
00224     int                             nModuleHistoMessageReceived_;
00225     int                             nPathHistoMessageReceived_;
00226     timeval                         runStartDetectedTimeStamp_;
00227     timeval                         lastModuleLegendaMessageTimeStamp_;
00228     timeval                         lastPathLegendaMessageTimeStamp_;
00229 
00230     int                             nDatasetLegendaMessageReceived_;
00231     int                             nDatasetLegendaMessageWithDataReceived_;
00232     timeval                         lastDatasetLegendaMessageTimeStamp_;
00233 
00234     //DQM histogram statistics
00235     std::vector<unsigned int> epInstances;
00236     std::vector<unsigned int> epMax;
00237     std::vector<float> HTscaling;
00238     std::vector<unsigned int> nbMachines;
00239     std::vector<float> machineWeight;
00240     std::vector<float> machineWeightInst;
00241 
00242     std::vector<std::string > endPathNames_;
00243     std::vector<std::string > datasetNames_;
00244 
00245     class commonLsStat {
00246       
00247       public:
00248       unsigned int ls_;
00249       std::vector<float> rateVec_;
00250       std::vector<float> busyVec_;
00251       std::vector<float> busyCPUVec_;
00252       std::vector<float> busyVecTheor_;
00253       std::vector<float> busyCPUVecTheor_;
00254       std::vector<unsigned int> nbMachines;
00255       std::vector<unsigned int> endPathCounts_; 
00256       std::vector<unsigned int> datasetCounts_; 
00257       commonLsStat(unsigned int lsid,unsigned int classes) {
00258         for (size_t i=0;i<classes;i++) {
00259           rateVec_.push_back(0.);
00260           busyVec_.push_back(0.);
00261           busyCPUVec_.push_back(0.);
00262           busyVecTheor_.push_back(0.);
00263           busyCPUVecTheor_.push_back(0.);
00264           nbMachines.push_back(0);
00265         }
00266         ls_=lsid;
00267       }
00268 
00269       void setBusyForClass(unsigned int classIdx,float rate,float busy,float busyTheor, float busyCPU, float busyCPUTheor, unsigned int nMachineReports) {
00270         rateVec_[classIdx]=rate;
00271         busyVec_[classIdx]=busy;
00272         busyCPUVec_[classIdx]=busyCPU;
00273         busyVecTheor_[classIdx]=busyTheor;
00274         busyCPUVecTheor_[classIdx]=busyCPUTheor;
00275         nbMachines[classIdx]=nMachineReports;
00276       }
00277 
00278       float getTotalRate() {
00279         float totRate=0;
00280         for (size_t i=0;i<rateVec_.size();i++) totRate+=rateVec_[i];
00281         return totRate;
00282       } 
00283 
00284       float getBusyTotalFrac(bool procstat,std::vector<float> & machineWeightInst) {
00285         double sum=0;
00286         double sumMachines=0;
00287         for (size_t i=0;i<busyVec_.size();i++) {
00288           if (!procstat)
00289             sum+=machineWeightInst[i]*nbMachines.at(i)*busyVec_[i];
00290           else
00291             sum+=machineWeightInst[i]*nbMachines.at(i)*busyCPUVec_[i];
00292           sumMachines+=machineWeightInst[i]*nbMachines.at(i);
00293         }
00294         if (sumMachines>0)
00295           return float(sum/sumMachines);
00296         else return 0.;
00297       }
00298 
00299       float getBusyTotalFracTheor(bool procstat,std::vector<float> & machineWeight) {
00300         float sum=0;
00301         float sumMachines=0;
00302         for (size_t i=0;i<busyVecTheor_.size() && i<nbMachines.size();i++) {
00303           if (!procstat)
00304             sum+=machineWeight[i]*nbMachines[i]*busyVecTheor_[i];
00305           else
00306             sum+=machineWeight[i]*nbMachines[i]*busyCPUVecTheor_[i];
00307           sumMachines+=machineWeight[i]*nbMachines[i];
00308         }
00309         if (sumMachines>0)
00310           return sum/sumMachines;
00311         else return 0.;
00312       }
00313 
00314       unsigned int getNReports() {
00315         unsigned int sum=0;
00316         for (size_t i=0;i<nbMachines.size();i++) sum+=nbMachines[i];
00317         return sum;
00318       }
00319 
00320       std::string printInfo() {
00321         std::ostringstream info;
00322         for (size_t i=0;i<rateVec_.size();i++) {
00323           info << i << "/r:" << rateVec_[i] <<"/b:"<<busyVec_[i]<<"/n:"<<nbMachines[i]<<"; ";
00324         }
00325         return info.str();
00326       }
00327     };
00328 
00329     class lsStat {
00330       public:
00331       unsigned int ls_;
00332       bool updated_;
00333       unsigned int nbSubs_;
00334       unsigned int nSampledNonIdle_;
00335       unsigned int nSampledNonIdle2_;
00336       unsigned int nSampledIdle_;
00337       unsigned int nSampledIdle2_;
00338       unsigned int nProc_;
00339       unsigned int nProc2_;
00340       unsigned int nCPUBusy_;
00341       unsigned int nReports_;
00342       unsigned int nMaxReports_;
00343       double rateAvg;
00344       double rateErr;
00345       double evtTimeAvg;
00346       double evtTimeErr;
00347       double fracWaitingAvg;
00348       double fracCPUBusy_;
00349       unsigned int nmodulenames_;
00350       unsigned int sumDeltaTms_;
00351       float avgDeltaT_;
00352       float avgDeltaT2_;
00353       std::pair<unsigned int,unsigned int> *moduleSamplingSums;
00354 
00355       lsStat(unsigned int ls, unsigned int nbSubs,unsigned int maxreps,unsigned int nmodulenames):
00356         ls_(ls),updated_(true),nbSubs_(nbSubs),
00357         nSampledNonIdle_(0),nSampledNonIdle2_(0),nSampledIdle_(0),nSampledIdle2_(0),
00358         nProc_(0),nProc2_(0),nCPUBusy_(0),nReports_(0),nMaxReports_(maxreps),nmodulenames_(nmodulenames),
00359         sumDeltaTms_(0),avgDeltaT_(23),avgDeltaT2_(0)
00360       {
00361         moduleSamplingSums = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00362         for (unsigned int i=0;i<nmodulenames_;i++) {
00363           moduleSamplingSums[i].first=i;
00364           moduleSamplingSums[i].second=0;
00365         }
00366       }
00367 
00368       ~lsStat() {
00369          delete moduleSamplingSums;
00370       }
00371 
00372       void update(unsigned int nSampledNonIdle,unsigned int nSampledIdle, 
00373                   unsigned int nProc,unsigned int ncpubusy, unsigned int deltaTms)
00374       {
00375         nReports_++;
00376         nSampledNonIdle_+=nSampledNonIdle;
00377         nSampledNonIdle2_+=pow(nSampledNonIdle,2);
00378         nSampledIdle_+=nSampledIdle;
00379         nSampledIdle2_+=pow(nSampledIdle,2);
00380         nProc_+=nProc;
00381         nProc2_+=pow(nProc,2);
00382         nCPUBusy_+=ncpubusy;
00383         sumDeltaTms_+=deltaTms;
00384         updated_=true;
00385       }
00386 
00387       std::pair<unsigned int,unsigned int> * getModuleSamplingPtr() {
00388         return moduleSamplingSums;
00389       }
00390 
00391       void deleteModuleSamplingPtr() {
00392         delete moduleSamplingSums;
00393         moduleSamplingSums=nullptr;
00394         nmodulenames_=0;
00395       }
00396 
00397       void calcStat()
00398       {
00399         if (!updated_) return;
00400         if (nReports_) {
00401           float tinv = 0.001/nReports_;
00402           fracCPUBusy_=nCPUBusy_*tinv;
00403           avgDeltaT_ = avgDeltaT2_ = sumDeltaTms_*tinv;
00404           if (avgDeltaT_==0.) {
00405             avgDeltaT_=23.;//default value
00406             avgDeltaT2_=0;
00407           }
00408           rateAvg=nProc_ / avgDeltaT_;
00409           rateErr=sqrt(fabs(nProc2_ - pow(nProc_,2)))/avgDeltaT_;
00410         }
00411         else {
00412           fracCPUBusy_=0.;
00413           rateAvg=0.;
00414           rateErr=0.;
00415           avgDeltaT_=23.;
00416         }
00417 
00418         evtTimeAvg=0.;evtTimeErr=0.;fracWaitingAvg=1.;
00419         unsigned int sampled = nSampledNonIdle_+nSampledIdle_;
00420         if (rateAvg!=0. && sampled) {
00421             float nAllInv = 1./sampled;
00422             fracWaitingAvg= nSampledIdle_*nAllInv;
00423             double nSampledIdleErr2=fabs(nSampledIdle2_ - pow(nSampledIdle_,2));
00424             double nSampledNonIdleErr2=fabs(nSampledNonIdle2_ - pow(nSampledNonIdle_,2));
00425             double fracWaitingAvgErr= sqrt(
00426                                     (pow(nSampledIdle_,2)*nSampledNonIdleErr2
00427                                      + pow(nSampledNonIdle_,2)*nSampledIdleErr2))*pow(nAllInv,2);
00428             float rateAvgInv=1./rateAvg;
00429             evtTimeAvg=nbSubs_ * nReports_ * (1.-fracWaitingAvg)*rateAvgInv;
00430             evtTimeErr = nbSubs_ * nReports_ * sqrt(pow(fracWaitingAvg*rateErr*pow(rateAvgInv,2),2) + pow(fracWaitingAvgErr*rateAvgInv,2));
00431         }
00432         updated_=false;
00433       }
00434 
00435       float getRate() {
00436         if (updated_) calcStat();
00437         return rateAvg;
00438       }
00439 
00440       float getRateErr() {
00441         if (updated_) calcStat();
00442         return rateErr;
00443       }
00444 
00445       float getRatePerMachine() {
00446         if (updated_) calcStat();
00447         if (nReports_)
00448         return rateAvg/(1.*nReports_);
00449         return 0.;
00450       }
00451 
00452       float getRateErrPerMachine() {
00453         if (updated_) calcStat();
00454         if (nReports_)
00455         return rateErr/(1.*nReports_);
00456         return 0.;
00457       }
00458 
00459       float getEvtTime() {
00460         if (updated_) calcStat();
00461         return evtTimeAvg;
00462       }
00463 
00464       float getEvtTimeErr() {
00465         if (updated_) calcStat();
00466         return evtTimeErr;
00467       }
00468 
00469       unsigned int getNSampledNonIdle() {
00470         if (updated_) calcStat();
00471         return nSampledNonIdle_;
00472       }
00473 
00474       float getFracBusy() {
00475         if (updated_) calcStat();
00476         return 1.-fracWaitingAvg;
00477       }
00478 
00479       float getFracCPUBusy() {
00480         if (updated_) calcStat();
00481         return fracCPUBusy_;
00482       }
00483 
00484       unsigned int getReports() {
00485         return nReports_;
00486       }
00487 
00488       float getDt() {
00489         if (updated_) calcStat();
00490         return avgDeltaT2_;
00491       }
00492 
00493       std::vector<std::pair<unsigned int, unsigned int>> getOffendersVector() {
00494         std::vector<std::pair<unsigned int, unsigned int>> ret;
00495         if (updated_) calcStat();
00496         if (moduleSamplingSums) {
00497           //make a copy for sorting
00498           std::pair<unsigned int,unsigned int> *moduleSumsCopy = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00499           memcpy(moduleSumsCopy,moduleSamplingSums,nmodulenames_*sizeof(std::pair<unsigned int,unsigned int>));
00500 
00501           std::qsort((void *)moduleSumsCopy, nmodulenames_,
00502                      sizeof(std::pair<unsigned int,unsigned int>), modlistSortFunction);
00503 
00504           unsigned int count=0;
00505           unsigned int saveidx=0;
00506           while (saveidx < MODNAMES && count<nmodulenames_)
00507           {
00508             if (moduleSumsCopy[count].first==2) {count++;continue;}
00509             ret.push_back(moduleSumsCopy[count]);
00510             saveidx++;
00511             count++;
00512           }
00513           delete moduleSumsCopy;
00514         }
00515         return ret;
00516       }
00517 
00518       float getOffenderFracAt(unsigned int x) {
00519         if (x<nmodulenames_) {
00520           if (updated_) calcStat();
00521           float total = nSampledNonIdle_+nSampledIdle_;
00522           if (total>0.) {
00523             for (size_t i=0;i<nmodulenames_;i++) {
00524               if (moduleSamplingSums[i].first==x)
00525               return moduleSamplingSums[i].second/total;
00526             }
00527           }
00528         }
00529         return 0.;
00530       }
00531     };
00532 
00533 
00534     //DQM
00535     boost::shared_ptr<std::vector<edm::ParameterSet> > pServiceSets_;
00536     edm::ServiceToken               serviceToken_;
00537     edm::EventProcessor             *evtProcessor_;
00538     bool                            meInitialized_;
00539     bool                            meInitializedStreams_;
00540     bool                            meInitializedDatasets_;
00541     DQMService                      *dqmService_;
00542     DQMStore                        *dqmStore_;
00543     std::string                     configString_;
00544     xdata::Boolean                  dqmEnabled_;
00545 
00546     std::map<unsigned int,int> nbSubsList;
00547     std::map<int,unsigned int> nbSubsListInv;
00548     unsigned int nbSubsClasses;
00549     std::vector<MonitorElement*> meVecRate_;
00550     std::vector<MonitorElement*> meVecTime_;
00551     std::vector<MonitorElement*> meVecOffenders_;
00552     MonitorElement * rateSummary_;
00553     MonitorElement * reportPeriodSummary_;
00554     MonitorElement * timingSummary_;
00555     MonitorElement * busySummary_;
00556     MonitorElement * busySummary2_;
00557     MonitorElement * busySummaryUncorr1_;
00558     MonitorElement * busySummaryUncorr2_;
00559     MonitorElement * fuReportsSummary_;
00560     MonitorElement * daqBusySummary_;
00561     MonitorElement * daqBusySummary2_;
00562     MonitorElement * busyModules_;
00563     unsigned int summaryLastLs_;
00564     std::vector<std::map<unsigned int, unsigned int> > occupancyNameMap;
00565     //1 queue per number of subProcesses (and one common)
00566     std::deque<commonLsStat*> commonLsHistory;
00567     std::deque<lsStat*> * lsHistory;
00568 
00569     //endpath statistics
00570     std::vector<MonitorElement *> endPathRates_;
00571 
00572     //dataset statistics
00573     std::vector<MonitorElement *> datasetRates_;
00574 
00575     std::vector<unsigned int> currentLs_;
00576 
00577     xdata::UnsignedInteger32 saveLsInterval_;
00578     unsigned int ilumiprev_;
00579     std::list<std::string> pastSavedFiles_;
00580     xdata::String dqmSaveDir_;
00581     xdata::Boolean dqmFilesWritable_;
00582     xdata::String topLevelFolder_;
00583     unsigned int savedForLs_;
00584     std::string fileBaseName_;
00585     bool writeDirectoryPresent_;
00586 
00587     timeval * reportingStart_;
00588     unsigned int lastSavedForTime_;
00589 
00590     unsigned int dsMismatch;
00591   }; // class iDie
00592 
00593   int modlistSortFunction( const void *a, const void *b)
00594   {
00595     std::pair<unsigned int,unsigned int> intOne = *((std::pair<unsigned int,unsigned int>*)a);
00596     std::pair<unsigned int,unsigned int> intTwo = *((std::pair<unsigned int,unsigned int>*)b);
00597     if (intOne.second > intTwo.second)
00598       return -1;
00599     if (intOne.second == intTwo.second)
00600       return 0;
00601     return 1;
00602   }
00603 
00604   float fround(float val, float mod) {
00605     return val - fmod(val,mod);
00606   }
00607 
00608 } // namespace evf
00609 
00610 
00611 #endif