CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_4/src/EventFilter/Goodies/src/iDie.h

Go to the documentation of this file.
00001 #ifndef EVENTFILTER_GOODIES_IDIE_H
00002 #define EVENTFILTER_GOODIES_IDIE_H
00003 
00004 #include "EventFilter/Utilities/interface/Exception.h"
00005 #include "EventFilter/Utilities/interface/TriggerReportDef.h"
00006 
00007 #include "xdata/String.h"
00008 #include "xdata/UnsignedInteger32.h"
00009 #include "xdata/Boolean.h"
00010 #include "xdata/ActionListener.h"
00011 
00012 #include "xoap/MessageReference.h"
00013 #include "xoap/MessageFactory.h"
00014 #include "xoap/Method.h"
00015 
00016 #include "xgi/Utils.h"
00017 #include "xgi/Input.h"
00018 #include "xgi/Output.h"
00019 #include "xgi/Method.h"
00020 
00021 #include "xdaq/Application.h"
00022 
00023 #include "toolbox/net/URN.h"
00024 #include "toolbox/fsm/exception/Exception.h"
00025 
00026 
00027 #include <vector>
00028 #include <deque>
00029 
00030 #include <sys/time.h>
00031 #include <math.h>
00032 
00033 #include "TFile.h"
00034 #include "TTree.h"
00035 
00036 #include "FWCore/Framework/interface/EventProcessor.h"
00037 #include "DQMServices/Core/src/DQMService.h"
00038 #include "DQMServices/Core/interface/DQMStore.h"
00039 #include "DQMServices/Core/interface/MonitorElement.h"
00040 
00041 #define MODNAMES 25
00042 
00043 namespace evf {
00044 
00045   int modlistSortFunction( const void *a, const void *b);
00046 
00047   namespace internal{
00048    struct fu{
00049       time_t tstamp;
00050       unsigned int ccount;
00051       std::vector<pid_t> cpids;
00052       std::vector<std::string> signals;
00053       std::vector<std::string> stacktraces;
00054     };
00055    struct rate{
00056      int nproc;
00057      int nsub;
00058      int nrep;
00059      int npath;
00060      int nendpath;
00061      int ptimesRun[evf::max_paths];
00062      int ptimesPassedPs[evf::max_paths];
00063      int ptimesPassedL1[evf::max_paths];
00064      int ptimesPassed[evf::max_paths];
00065      int ptimesFailed[evf::max_paths];
00066      int ptimesExcept[evf::max_paths];
00067      int etimesRun[evf::max_endpaths];
00068      int etimesPassedPs[evf::max_endpaths];
00069      int etimesPassedL1[evf::max_endpaths];
00070      int etimesPassed[evf::max_endpaths];
00071      int etimesFailed[evf::max_endpaths];
00072      int etimesExcept[evf::max_endpaths];
00073    };
00074 
00075   }
00076   typedef std::map<std::string,internal::fu> fmap;
00077   typedef fmap::iterator ifmap;
00078   
00079   class iDie : public xdaq::Application,
00080     public xdata::ActionListener
00081   {
00082   public:
00083     //
00084     // xdaq instantiator macro
00085     //
00086     XDAQ_INSTANTIATOR();
00087   
00088     
00089     //
00090     // construction/destruction
00091     //
00092     iDie(xdaq::ApplicationStub *s);
00093     virtual ~iDie();
00094     //UI
00095     void defaultWeb(xgi::Input *in,xgi::Output *out)
00096       throw (xgi::exception::Exception);
00097     void summaryTable(xgi::Input *in,xgi::Output *out)
00098       throw (xgi::exception::Exception);
00099     void detailsTable(xgi::Input *in,xgi::Output *out)
00100       throw (xgi::exception::Exception);
00101     void dumpTable(xgi::Input *in,xgi::Output *out)
00102       throw (xgi::exception::Exception);
00103     void updater(xgi::Input *in,xgi::Output *out)
00104       throw (xgi::exception::Exception);
00105     void iChoke(xgi::Input *in,xgi::Output *out)
00106       throw (xgi::exception::Exception);
00107     void iChokeMiniInterface(xgi::Input *in,xgi::Output *out)
00108       throw (xgi::exception::Exception);
00109     void spotlight(xgi::Input *in,xgi::Output *out)
00110       throw (xgi::exception::Exception);
00111     //AI
00112     void postEntry(xgi::Input*in,xgi::Output*out)
00113       throw (xgi::exception::Exception);
00114     void postEntryiChoke(xgi::Input*in,xgi::Output*out)
00115       throw (xgi::exception::Exception);
00116     
00117     // *fake* fsm soap command callback
00118     xoap::MessageReference fsmCallback(xoap::MessageReference msg)
00119       throw (xoap::exception::Exception);
00120 
00121     // xdata:ActionListener interface
00122     void actionPerformed(xdata::Event& e);
00123 
00124 
00125   private:
00126 
00127     struct sorted_indices{
00128       sorted_indices(const std::vector<int> &arr) : arr_(arr)
00129       {
00130         ind_.resize(arr_.size(),0);
00131         unsigned int i = 0;
00132         while(i<ind_.size()) {ind_[i] = i; i++;}
00133         std::sort(ind_.rbegin(),ind_.rend(),*this);
00134       }
00135       int operator[](size_t ind) const {return arr_[ind_[ind]];}
00136       
00137       bool operator()(const size_t a, const size_t b) const
00138       {
00139         return arr_[a]<arr_[b];
00140       }
00141       int ii(size_t ind){return ind_[ind];}
00142       std::vector<int> ind_;
00143       const std::vector<int> &arr_;
00144     };
00145     //
00146     // private member functions
00147     //
00148     class lsStat;
00149     class commonLsStat;
00150 
00151     void reset();
00152     void parseModuleLegenda(std::string);
00153     void parseModuleHisto(const char *, unsigned int);
00154     void parsePathLegenda(std::string);
00155     void parsePathHisto(const unsigned char *, unsigned int);
00156     void initFramework();
00157     void deleteFramework();
00158     void initMonitorElements();
00159     void initMonitorElementsStreams();
00160     void fillDQMStatHist(unsigned int nbsIdx, unsigned int lsid);
00161     void fillDQMModFractionHist(unsigned int nbsIdx, unsigned int lsid, unsigned int nonIdle,
00162                                  std::vector<std::pair<unsigned int, unsigned int>> offenders);
00163  
00164     void updateRollingHistos(unsigned int nbsIdx, unsigned int lsid, lsStat * lst, commonLsStat * clst, bool roll);
00165     void updateStreamHistos(unsigned int forls, commonLsStat *clst, commonLsStat *prevclst);
00166     void doFlush();
00167     void perLumiFileSaver(unsigned int lsid);
00168     //
00169     // member data
00170     //
00171 
00172     // message logger
00173     Logger                          log_;
00174     std::string                     dqmState_;          
00175     // monitored parameters
00176     xdata::String                   url_;
00177     xdata::String                   class_;
00178     xdata::UnsignedInteger32        instance_;
00179     xdata::String                   hostname_;
00180     xdata::UnsignedInteger32        runNumber_;
00181     xdata::String                   dqmCollectorHost_;
00182     xdata::String                   dqmCollectorPort_;
00183     fmap                            fus_;
00184     
00185     unsigned int                    totalCores_;
00186     unsigned int                    nstates_;   
00187     std::vector<int>                cpuentries_;
00188     std::vector<std::vector<int> >  cpustat_;
00189     std::vector<std::string>        mapmod_;
00190     unsigned int                    last_ls_;
00191     std::vector<TriggerReportStatic>trp_;
00192     std::vector<int>                trpentries_;
00193     std::vector<std::string>        mappath_;
00194     //root stuff
00195     TFile                          *f_;
00196     TTree                          *t_;
00197     TBranch                        *b_;
00198     TBranch                        *b1_;
00199     TBranch                        *b2_;
00200     TBranch                        *b3_;
00201     TBranch                        *b4_;
00202     int                            *datap_;
00203     TriggerReportStatic            *trppriv_;
00204     internal::rate                  r_;
00205 
00206     //message statistics 
00207     int                             nModuleLegendaMessageReceived_;
00208     int                             nPathLegendaMessageReceived_;
00209     int                             nModuleLegendaMessageWithDataReceived_;
00210     int                             nPathLegendaMessageWithDataReceived_;
00211     int                             nModuleHistoMessageReceived_;
00212     int                             nPathHistoMessageReceived_;
00213     timeval                         runStartDetectedTimeStamp_;
00214     timeval                         lastModuleLegendaMessageTimeStamp_;
00215     timeval                         lastPathLegendaMessageTimeStamp_;
00216 
00217     //DQM histogram statistics
00218     std::vector<unsigned int> epInstances;
00219     std::vector<unsigned int> epMax;
00220     std::vector<float> HTscaling;
00221     std::vector<unsigned int> nbMachines;
00222     std::vector<float> machineWeight;
00223     std::vector<float> machineWeightInst;
00224 
00225     std::vector<std::string > endPathNames_;
00226 
00227     class commonLsStat {
00228       
00229       public:
00230       unsigned int ls_;
00231       std::vector<float> rateVec_;
00232       std::vector<float> busyVec_;
00233       std::vector<float> busyCPUVec_;
00234       std::vector<float> busyVecTheor_;
00235       std::vector<float> busyCPUVecTheor_;
00236       std::vector<unsigned int> nbMachines;
00237       std::vector<unsigned int> endPathCounts_; 
00238       commonLsStat(unsigned int lsid,unsigned int classes) {
00239         for (size_t i=0;i<classes;i++) {
00240           rateVec_.push_back(0.);
00241           busyVec_.push_back(0.);
00242           busyCPUVec_.push_back(0.);
00243           busyVecTheor_.push_back(0.);
00244           busyCPUVecTheor_.push_back(0.);
00245           nbMachines.push_back(0);
00246         }
00247         ls_=lsid;
00248       }
00249 
00250       void setBusyForClass(unsigned int classIdx,float rate,float busy,float busyTheor, float busyCPU, float busyCPUTheor, unsigned int nMachineReports) {
00251         rateVec_[classIdx]=rate;
00252         busyVec_[classIdx]=busy;
00253         busyCPUVec_[classIdx]=busyCPU;
00254         busyVecTheor_[classIdx]=busyTheor;
00255         busyCPUVecTheor_[classIdx]=busyCPUTheor;
00256         nbMachines[classIdx]=nMachineReports;
00257       }
00258 
00259       float getTotalRate() {
00260         float totRate=0;
00261         for (size_t i=0;i<rateVec_.size();i++) totRate+=rateVec_[i];
00262         return totRate;
00263       } 
00264 
00265       float getBusyTotalFrac(bool procstat,std::vector<float> & machineWeightInst) {
00266         double sum=0;
00267         double sumMachines=0;
00268         for (size_t i=0;i<busyVec_.size();i++) {
00269           if (!procstat)
00270             sum+=machineWeightInst[i]*nbMachines.at(i)*busyVec_[i];
00271           else
00272             sum+=machineWeightInst[i]*nbMachines.at(i)*busyCPUVec_[i];
00273           sumMachines+=machineWeightInst[i]*nbMachines.at(i);
00274         }
00275         if (sumMachines>0)
00276           return float(sum/sumMachines);
00277         else return 0.;
00278       }
00279 
00280       float getBusyTotalFracTheor(bool procstat,std::vector<float> & machineWeight) {
00281         float sum=0;
00282         float sumMachines=0;
00283         for (size_t i=0;i<busyVecTheor_.size() && i<nbMachines.size();i++) {
00284           if (!procstat)
00285             sum+=machineWeight[i]*nbMachines[i]*busyVecTheor_[i];
00286           else
00287             sum+=machineWeight[i]*nbMachines[i]*busyCPUVecTheor_[i];
00288           sumMachines+=machineWeight[i]*nbMachines[i];
00289         }
00290         if (sumMachines>0)
00291           return sum/sumMachines;
00292         else return 0.;
00293       }
00294 
00295       unsigned int getNReports() {
00296         unsigned int sum=0;
00297         for (size_t i=0;i<nbMachines.size();i++) sum+=nbMachines[i];
00298         return sum;
00299       }
00300 
00301       std::string printInfo() {
00302         std::ostringstream info;
00303         for (size_t i=0;i<rateVec_.size();i++) {
00304           info << i << "/r:" << rateVec_[i] <<"/b:"<<busyVec_[i]<<"/n:"<<nbMachines[i]<<"; ";
00305         }
00306         return info.str();
00307       }
00308     };
00309 
00310     class lsStat {
00311       public:
00312       unsigned int ls_;
00313       bool updated_;
00314       unsigned int nbSubs_;
00315       unsigned int nSampledNonIdle_;
00316       unsigned int nSampledNonIdle2_;
00317       unsigned int nSampledIdle_;
00318       unsigned int nSampledIdle2_;
00319       unsigned int nProc_;
00320       unsigned int nProc2_;
00321       unsigned int nCPUBusy_;
00322       unsigned int nReports_;
00323       unsigned int nMaxReports_;
00324       double rateAvg;
00325       double rateErr;
00326       double evtTimeAvg;
00327       double evtTimeErr;
00328       double fracWaitingAvg;
00329       double fracCPUBusy_;
00330       unsigned int nmodulenames_;
00331       unsigned int sumDeltaTms_;
00332       float avgDeltaT_;
00333       std::pair<unsigned int,unsigned int> *moduleSamplingSums;
00334 
00335       lsStat(unsigned int ls, unsigned int nbSubs,unsigned int maxreps,unsigned int nmodulenames):
00336         ls_(ls),updated_(true),nbSubs_(nbSubs),
00337         nSampledNonIdle_(0),nSampledNonIdle2_(0),nSampledIdle_(0),nSampledIdle2_(0),
00338         nProc_(0),nProc2_(0),nCPUBusy_(0),nReports_(0),nMaxReports_(maxreps),nmodulenames_(nmodulenames),
00339         sumDeltaTms_(0),avgDeltaT_(23)
00340       {
00341         moduleSamplingSums = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00342         for (unsigned int i=0;i<nmodulenames_;i++) {
00343           moduleSamplingSums[i].first=i;
00344           moduleSamplingSums[i].second=0;
00345         }
00346       }
00347 
00348       ~lsStat() {
00349          delete moduleSamplingSums;
00350       }
00351 
00352       void update(unsigned int nSampledNonIdle,unsigned int nSampledIdle, 
00353                   unsigned int nProc,unsigned int ncpubusy, unsigned int deltaTms)
00354       {
00355         nReports_++;
00356         nSampledNonIdle_+=nSampledNonIdle;
00357         nSampledNonIdle2_+=pow(nSampledNonIdle,2);
00358         nSampledIdle_+=nSampledIdle;
00359         nSampledIdle2_+=pow(nSampledIdle,2);
00360         nProc_+=nProc;
00361         nProc2_+=pow(nProc,2);
00362         nCPUBusy_+=ncpubusy;
00363         sumDeltaTms_+=deltaTms;
00364         updated_=true;
00365       }
00366 
00367       std::pair<unsigned int,unsigned int> * getModuleSamplingPtr() {
00368         return moduleSamplingSums;
00369       }
00370 
00371       void deleteModuleSamplingPtr() {
00372         delete moduleSamplingSums;
00373         moduleSamplingSums=nullptr;
00374         nmodulenames_=0;
00375       }
00376 
00377       void calcStat()
00378       {
00379         if (!updated_) return;
00380         if (nReports_) {
00381           float tinv = 0.001/nReports_;
00382           fracCPUBusy_=nCPUBusy_*tinv;
00383           avgDeltaT_=sumDeltaTms_*tinv;
00384           if (avgDeltaT_==0.) avgDeltaT_=23.;//default value
00385           rateAvg=nProc_ / avgDeltaT_;
00386           rateErr=sqrt(fabs(nProc2_ - pow(nProc_,2)))/avgDeltaT_;
00387         }
00388         else {
00389           fracCPUBusy_=0.;
00390           rateAvg=0.;
00391           rateErr=0.;
00392           avgDeltaT_=23.;
00393         }
00394 
00395         evtTimeAvg=0.;evtTimeErr=0.;fracWaitingAvg=1.;
00396         unsigned int sampled = nSampledNonIdle_+nSampledIdle_;
00397         if (rateAvg!=0. && sampled) {
00398             float nAllInv = 1./sampled;
00399             fracWaitingAvg= nSampledIdle_*nAllInv;
00400             double nSampledIdleErr2=fabs(nSampledIdle2_ - pow(nSampledIdle_,2));
00401             double nSampledNonIdleErr2=fabs(nSampledNonIdle2_ - pow(nSampledNonIdle_,2));
00402             double fracWaitingAvgErr= sqrt(
00403                                     (pow(nSampledIdle_,2)*nSampledNonIdleErr2
00404                                      + pow(nSampledNonIdle_,2)*nSampledIdleErr2))*pow(nAllInv,2);
00405             float rateAvgInv=1./rateAvg;
00406             evtTimeAvg=nbSubs_ * nReports_ * (1.-fracWaitingAvg)*rateAvgInv;
00407             evtTimeErr = nbSubs_ * nReports_ * sqrt(pow(fracWaitingAvg*rateErr*pow(rateAvgInv,2),2) + pow(fracWaitingAvgErr*rateAvgInv,2));
00408         }
00409         updated_=false;
00410       }
00411 
00412       float getRate() {
00413         if (updated_) calcStat();
00414         return rateAvg;
00415       }
00416 
00417       float getRateErr() {
00418         if (updated_) calcStat();
00419         return rateErr;
00420       }
00421 
00422       float getRatePerMachine() {
00423         if (updated_) calcStat();
00424         if (nReports_)
00425         return rateAvg/(1.*nReports_);
00426         return 0.;
00427       }
00428 
00429       float getRateErrPerMachine() {
00430         if (updated_) calcStat();
00431         if (nReports_)
00432         return rateErr/(1.*nReports_);
00433         return 0.;
00434       }
00435 
00436       float getEvtTime() {
00437         if (updated_) calcStat();
00438         return evtTimeAvg;
00439       }
00440 
00441       float getEvtTimeErr() {
00442         if (updated_) calcStat();
00443         return evtTimeErr;
00444       }
00445 
00446       unsigned int getNSampledNonIdle() {
00447         if (updated_) calcStat();
00448         return nSampledNonIdle_;
00449       }
00450 
00451       float getFracBusy() {
00452         if (updated_) calcStat();
00453         return 1.-fracWaitingAvg;
00454       }
00455 
00456       float getFracCPUBusy() {
00457         if (updated_) calcStat();
00458         return fracCPUBusy_;
00459       }
00460 
00461       unsigned int getReports() {
00462         return nReports_;
00463       }
00464 
00465       std::vector<std::pair<unsigned int, unsigned int>> getOffendersVector() {
00466         std::vector<std::pair<unsigned int, unsigned int>> ret;
00467         if (updated_) calcStat();
00468         if (moduleSamplingSums) {
00469           //make a copy for sorting
00470           std::pair<unsigned int,unsigned int> *moduleSumsCopy = new std::pair<unsigned int,unsigned int>[nmodulenames_];
00471           memcpy(moduleSumsCopy,moduleSamplingSums,nmodulenames_*sizeof(std::pair<unsigned int,unsigned int>));
00472 
00473           std::qsort((void *)moduleSumsCopy, nmodulenames_,
00474                      sizeof(std::pair<unsigned int,unsigned int>), modlistSortFunction);
00475 
00476           unsigned int count=0;
00477           unsigned int saveidx=0;
00478           while (saveidx < MODNAMES && count<nmodulenames_)
00479           {
00480             if (moduleSumsCopy[count].first==2) {count++;continue;}
00481             ret.push_back(moduleSumsCopy[count]);
00482             saveidx++;
00483             count++;
00484           }
00485           delete moduleSumsCopy;
00486         }
00487         return ret;
00488       }
00489 
00490       float getOffenderFracAt(unsigned int x) {
00491         if (x<nmodulenames_) {
00492           if (updated_) calcStat();
00493           float total = nSampledNonIdle_+nSampledIdle_;
00494           if (total>0.) {
00495             for (size_t i=0;i<nmodulenames_;i++) {
00496               if (moduleSamplingSums[i].first==x)
00497               return moduleSamplingSums[i].second/total;
00498             }
00499           }
00500         }
00501         return 0.;
00502       }
00503     };
00504 
00505 
00506     //DQM
00507     boost::shared_ptr<std::vector<edm::ParameterSet> > pServiceSets_;
00508     edm::ServiceToken               serviceToken_;
00509     edm::EventProcessor             *evtProcessor_;
00510     bool                            meInitialized_;
00511     bool                            meInitializedStreams_;
00512     DQMService                      *dqmService_;
00513     DQMStore                        *dqmStore_;
00514     std::string                     configString_;
00515     xdata::Boolean                  dqmEnabled_;
00516 
00517     std::map<unsigned int,int> nbSubsList;
00518     std::map<int,unsigned int> nbSubsListInv;
00519     unsigned int nbSubsClasses;
00520     std::vector<MonitorElement*> meVecRate_;
00521     std::vector<MonitorElement*> meVecTime_;
00522     std::vector<MonitorElement*> meVecOffenders_;
00523     MonitorElement * rateSummary_;
00524     MonitorElement * timingSummary_;
00525     MonitorElement * busySummary_;
00526     MonitorElement * busySummary2_;
00527     MonitorElement * fuReportsSummary_;
00528     MonitorElement * daqBusySummary_;
00529     MonitorElement * busyModules_;
00530     unsigned int summaryLastLs_;
00531     std::vector<std::map<unsigned int, unsigned int> > occupancyNameMap;
00532     //1 queue per number of subProcesses (and one common)
00533     std::deque<commonLsStat*> commonLsHistory;
00534     std::deque<lsStat*> * lsHistory;
00535 
00536     //endpath statistics
00537     std::vector<MonitorElement *> endPathRates_;
00538     std::vector<MonitorElement *> endPathCumulative_;
00539     //std::map<unsigned int,std::pair<unsigned int,std::string>> endPathIndex_;
00540 
00541     std::vector<unsigned int> currentLs_;
00542 
00543     xdata::UnsignedInteger32 saveLsInterval_;
00544     unsigned int ilumiprev_;
00545     std::list<std::string> pastSavedFiles_;
00546     xdata::String dqmSaveDir_;
00547     xdata::Boolean dqmFilesWritable_;
00548     xdata::String topLevelFolder_;
00549     unsigned int savedForLs_;
00550     std::string fileBaseName_;
00551     bool writeDirectoryPresent_;
00552   }; // class iDie
00553 
00554   int modlistSortFunction( const void *a, const void *b)
00555   {
00556     std::pair<unsigned int,unsigned int> intOne = *((std::pair<unsigned int,unsigned int>*)a);
00557     std::pair<unsigned int,unsigned int> intTwo = *((std::pair<unsigned int,unsigned int>*)b);
00558     if (intOne.second > intTwo.second)
00559       return -1;
00560     if (intOne.second == intTwo.second)
00561       return 0;
00562     return 1;
00563   }
00564 
00565   float fround(float val, float mod) {
00566     return val - fmod(val,mod);
00567   }
00568 
00569 } // namespace evf
00570 
00571 
00572 #endif