CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/HLTriggerOffline/Egamma/src/EmDQMPostProcessor.cc

Go to the documentation of this file.
00001 #include "HLTriggerOffline/Egamma/interface/EmDQMPostProcessor.h"
00002 
00003 #include "DQMServices/Core/interface/DQMStore.h"
00004 #include "DQMServices/Core/interface/MonitorElement.h"
00005 #include "FWCore/ServiceRegistry/interface/Service.h"
00006 #include "FWCore/Framework/interface/MakerMacros.h"
00007 
00008 #include <iostream>
00009 #include <string.h>
00010 #include <iomanip>
00011 #include <fstream>
00012 #include <math.h>
00013 
00014 #include <TEfficiency.h>
00015 
00016 //----------------------------------------------------------------------
00017 
00018 EmDQMPostProcessor::EmDQMPostProcessor(const edm::ParameterSet& pset)
00019 {
00020   subDir_  = pset.getUntrackedParameter<std::string>("subDir");
00021 
00022   dataSet_ = pset.getUntrackedParameter<std::string>("dataSet","unknown");
00023 
00024   noPhiPlots = pset.getUntrackedParameter<bool>("noPhiPlots", true);
00025 
00026   normalizeToReco = pset.getUntrackedParameter<bool>("normalizeToReco",false);
00027 }
00028 
00029 //----------------------------------------------------------------------
00030 
00031 void EmDQMPostProcessor::endRun(edm::Run const& run, edm::EventSetup const& es)
00032 {
00034   // setup DQM stor               //
00036   
00037   DQMStore * dqm = 0;
00038   dqm = edm::Service<DQMStore>().operator->();
00039 
00040   if ( ! dqm ) {
00041     edm::LogInfo("EmDQMPostProcessor") << "Cannot create DQMStore instance\n";
00042     return;
00043   }
00044 
00045 
00046   //go to the directory to be processed
00047   if(dqm->dirExists(subDir_)) dqm->cd(subDir_);
00048   else {
00049     edm::LogWarning("EmDQMPostProcessor") << "cannot find directory: " << subDir_ << " , skipping";
00050     return;
00051   }
00052 
00053   //--------------------
00054   // with respect to what are (some) efficiencies calculated ?
00055   std::string shortReferenceName;
00056   if (normalizeToReco)
00057     shortReferenceName = "RECO";
00058   else
00059     shortReferenceName = "gen";
00060 
00061 
00062 
00063   //--------------------
00064 
00065 
00067   //loop over all triggers/samples//
00069 
00070   // store dataset name (if defined) in output file
00071   // DQMStore:bookString seems to put a key in the file which is
00072   // of the form <dataSet>s=....</dataSet> which is not very convenient
00073   // (it points to a null pointer, one would have to loop over
00074   // all keys of the corresponding directory in the ROOT file
00075   // and check whether it is of the desired form and then parse
00076   // it from this string...).
00077   //
00078   // So we store the name of the dataset as the title of a histogram,
00079   // which is much easier to access...
00080   // TH1D *dataSetNameHisto = 
00081   dqm->book1D("DataSetNameHistogram",dataSet_,1,0,1);
00082 
00083   std::vector<std::string> subdirectories = dqm->getSubdirs();
00085   // Do everything twice: once for mc-matched histos,   //
00086   // once for unmatched histos                          //
00088 
00089   std::vector<std::string> postfixes;
00090   postfixes.push_back("");   //unmatched histograms
00091   postfixes.push_back("_RECO_matched"); // for data
00092   // we put this on the list even when we're running on 
00093   // data (where there is no generator information).
00094   // The first test in the loop will then fail and
00095   // the iteration is skipped.
00096   postfixes.push_back("_MC_matched"); 
00097 
00098   std::vector<TProfile *> allElePaths;
00099   int nEle = 0;
00100   int nPhoton = 0;
00101 
00102   // find the number of electron and photon paths
00103   for(std::vector<std::string>::iterator dir = subdirectories.begin() ;dir!= subdirectories.end(); ++dir) {
00104     if (dir->find("HLT_Ele") != std::string::npos || dir->find("HLT_DoubleEle") != std::string::npos || dir->find("HLT_TripleEle") != std::string::npos) ++nEle;
00105     else if (dir->find("HLT_Photon") != std::string::npos || dir->find("HLT_DoublePhoton") != std::string::npos) ++nPhoton;
00106   }
00107 
00108   std::vector<TProfile *> allPhotonPaths;
00109   for(std::vector<std::string>::iterator postfix=postfixes.begin(); postfix!=postfixes.end();postfix++){
00110     bool pop = false;
00111     int elePos = 1;
00112     int photonPos = 1;
00113 
00115     // computer per-event efficiencies //
00117     
00118     std::string histoName = "efficiency_by_step" + *postfix;
00119     std::string baseName = "total_eff" + *postfix;
00120 
00121     std::string allEleHistoName = "EfficiencyByPath_Ele" + *postfix;
00122     std::string allEleHistoLabel = "Efficiency_for_each_validated_electron_path" + *postfix;
00123     allElePaths.push_back(new TProfile(allEleHistoName.c_str(), allEleHistoLabel.c_str(), nEle, 0., (double)nEle, 0., 1.2));
00124     std::string allPhotonHistoName = "EfficiencyByPath_Photon" + *postfix;
00125     std::string allPhotonHistoLabel = "Efficiency_for_each_validated_photon_path" + *postfix;
00126     allPhotonPaths.push_back(new TProfile(allPhotonHistoName.c_str(), allPhotonHistoLabel.c_str(), nPhoton, 0., (double)nPhoton, 0., 1.2));
00127 
00128     for(std::vector<std::string>::iterator dir = subdirectories.begin(); dir!= subdirectories.end(); dir++) {
00129       dqm->cd(*dir);
00130 
00131       TH1F* basehist = getHistogram(dqm, dqm->pwd() + "/" + baseName);
00132       if (basehist == NULL)
00133         {
00134           //edm::LogWarning("EmDQMPostProcessor") << "histogram " << (dqm->pwd() + "/" + baseName) << " does not exist, skipping postfix '" << *postfix << "'";
00135           pop = true;
00136           dqm->goUp();
00137           continue;
00138         }
00139       // at least one histogram with postfix was found
00140       pop = false;
00141           
00142       TProfile* total = dqm->bookProfile(histoName,histoName,basehist->GetXaxis()->GetNbins(),basehist->GetXaxis()->GetXmin(),basehist->GetXaxis()->GetXmax(),0.,1.2)->getTProfile();
00143       total->GetXaxis()->SetBinLabel(1,basehist->GetXaxis()->GetBinLabel(1));
00144       
00145 //       std::vector<std::string> mes = dqm->getMEs();
00146 //       for(std::vector<std::string>::iterator me = mes.begin() ;me!= mes.end(); me++ )
00147 //      std::cout <<*me <<std::endl;
00148 //       std::cout <<std::endl;
00149       
00150       double value=0;
00151       double errorh=0,errorl=0,error=0;    
00152       //compute stepwise total efficiencies 
00153       for(int bin= total->GetNbinsX()-2 ; bin > 1  ; bin--){
00154         value=0;
00155         errorl=0;
00156         errorh=0;
00157         error=0;
00158         if(basehist->GetBinContent(bin-1) != 0){
00159           Efficiency( (int)basehist->GetBinContent(bin), (int)basehist->GetBinContent(bin-1), 0.683, value, errorl, errorh );
00160           error = value-errorl>errorh-value ? value-errorl : errorh-value;
00161         }
00162         total->SetBinContent( bin, value );
00163         total->SetBinEntries( bin, 1 );
00164         total->SetBinError( bin, sqrt(value*value+error*error) );
00165         total->GetXaxis()->SetBinLabel(bin,basehist->GetXaxis()->GetBinLabel(bin));
00166       }
00167 
00168       //set first bin to L1 efficiency
00169       if(basehist->GetBinContent(basehist->GetNbinsX()) !=0 ){
00170         Efficiency( (int)basehist->GetBinContent(1), (int)basehist->GetBinContent(basehist->GetNbinsX()), 0.683, value, errorl, errorh );
00171         error= value-errorl>errorh-value ? value-errorl : errorh-value;
00172       }else{
00173         value=0;error=0;
00174       }
00175       total->SetBinContent(1,value);
00176       total->SetBinEntries(1, 1 );
00177       total->SetBinError(1, sqrt(value*value+error*error) );
00178      
00179       // total efficiency relative to gen or reco
00180       if(basehist->GetBinContent(basehist->GetNbinsX()) !=0 ){
00181         Efficiency( (int)basehist->GetBinContent(basehist->GetNbinsX()-2), (int)basehist->GetBinContent(basehist->GetNbinsX()), 0.683, value, errorl, errorh );
00182         error= value-errorl>errorh-value ? value-errorl : errorh-value;
00183       }else{
00184         value=0;error=0;
00185       }
00186       total->SetBinContent(total->GetNbinsX(),value);
00187       total->SetBinEntries(total->GetNbinsX(),1);
00188       total->SetBinError(total->GetNbinsX(),sqrt(value*value+error*error));
00189       total->GetXaxis()->SetBinLabel(total->GetNbinsX(),("total efficiency rel. " + shortReferenceName).c_str());
00190       
00191       //total efficiency relative to L1
00192       if(basehist->GetBinContent(1) !=0 ){
00193         Efficiency( (int)basehist->GetBinContent(basehist->GetNbinsX()-2), (int)basehist->GetBinContent(1), 0.683, value, errorl, errorh );
00194         error= value-errorl > errorh-value ? value-errorl : errorh-value;
00195       }else{
00196         value=0;error=0;
00197       }
00198       total->SetBinContent(total->GetNbinsX()-1,value);
00199       total->SetBinError(total->GetNbinsX()-1,sqrt(value*value+error*error));
00200       total->SetBinEntries(total->GetNbinsX()-1,1);
00201       total->GetXaxis()->SetBinLabel(total->GetNbinsX()-1,"total efficiency rel. L1");
00202 
00203       //----------------------------------------
00204 
00206       // compute per-object efficiencies       //
00208       //MonitorElement *eff, *num, *denom, *genPlot, *effVsGen, *effL1VsGen;
00209       std::vector<std::string> varNames; 
00210       varNames.push_back("et");
00211       varNames.push_back("eta"); 
00212       if (!noPhiPlots) varNames.push_back("phi"); 
00213 
00214       std::string filterName;
00215       std::string filterName2;
00216       std::string denomName;
00217       std::string numName;
00218 
00219       // Get the gen-level (or reco, for data) plots
00220       std::string genName;
00221 
00222       // Get the L1 over gen filter first
00223       filterName2= total->GetXaxis()->GetBinLabel(1);
00224         
00225       //loop over variables (eta/phi/et)
00226       for(std::vector<std::string>::iterator var = varNames.begin(); var != varNames.end() ; var++){
00227         
00228         numName   = dqm->pwd() + "/" + filterName2 + *var + *postfix;
00229 
00230         if (normalizeToReco)
00231           genName   = dqm->pwd() + "/reco_" + *var ;
00232         else
00233           genName   = dqm->pwd() + "/gen_" + *var ;
00234 
00235         // Create the efficiency plot
00236         if(!dividehistos(dqm,numName,genName,"efficiency_"+filterName2+"_vs_"+*var +*postfix,*var,"eff. of"+filterName2+" vs "+*var +*postfix))
00237           break;
00238       } // loop over variables
00239     
00240       // get the filter names from the bin-labels of the master-histogram
00241       for (int filter=1; filter < total->GetNbinsX()-2; filter++) {
00242         filterName = total->GetXaxis()->GetBinLabel(filter);
00243         filterName2= total->GetXaxis()->GetBinLabel(filter+1);
00244         
00245         //loop over variables (eta/et/phi)
00246         for(std::vector<std::string>::iterator var = varNames.begin(); var != varNames.end() ; var++){
00247           numName   = dqm->pwd() + "/" + filterName2 + *var + *postfix;
00248           denomName = dqm->pwd() + "/" + filterName  + *var + *postfix;
00249 
00250           // Is this the last filter? Book efficiency vs gen (or reco, for data) level
00251           std::string temp = *postfix;
00252           if (filter==total->GetNbinsX()-3 && temp.find("matched")!=std::string::npos) {
00253             if (normalizeToReco)
00254               genName = dqm->pwd() + "/reco_" + *var;
00255             else
00256               genName = dqm->pwd() + "/gen_" + *var;
00257 
00258             if(!dividehistos(dqm,numName,genName,"final_eff_vs_"+*var,*var,"Efficiency Compared to " + shortReferenceName + " vs "+*var))
00259               break;
00260           }
00261 
00262           if(!dividehistos(dqm,numName,denomName,"efficiency_"+filterName2+"_vs_"+*var +*postfix,*var,"efficiency_"+filterName2+"_vs_"+*var + *postfix))
00263             break;
00264 
00265         } // loop over variables
00266       } // loop over monitoring modules within path
00267 
00268       dqm->goUp();
00269 
00270       // fill overall efficiency histograms
00271       std::string trigName = dir->substr(dir->rfind("/") + 1);
00272       trigName = trigName.replace(trigName.rfind("_DQM"),4,"");
00273       double totCont = total->GetBinContent(total->GetNbinsX());
00274       double totErr = total->GetBinError(total->GetNbinsX());
00275       if (trigName.find("HLT_Ele") != std::string::npos || trigName.find("HLT_DoubleEle") != std::string::npos || trigName.find("HLT_TripleEle") != std::string::npos) {
00276         allElePaths.back()->SetBinContent(elePos, totCont);
00277         allElePaths.back()->SetBinEntries(elePos, 1);
00278         allElePaths.back()->SetBinError(elePos, sqrt(totCont * totCont + totErr * totErr));
00279         allElePaths.back()->GetXaxis()->SetBinLabel(elePos, trigName.c_str());
00280         ++elePos;
00281       }
00282       else if (trigName.find("HLT_Photon") != std::string::npos || trigName.find("HLT_DoublePhoton") != std::string::npos) {
00283         allPhotonPaths.back()->SetBinContent(photonPos, totCont);
00284         allPhotonPaths.back()->SetBinEntries(photonPos, 1);
00285         allPhotonPaths.back()->SetBinError(photonPos, sqrt(totCont * totCont + totErr * totErr));
00286         allPhotonPaths.back()->GetXaxis()->SetBinLabel(photonPos, trigName.c_str());
00287         ++photonPos;
00288       }
00289 
00290     } // loop over dirs
00291     if (pop) {
00292       allElePaths.pop_back();
00293       allPhotonPaths.pop_back();
00294     } 
00295     else {
00296       allElePaths.back()->GetXaxis()->SetLabelSize(0.03);
00297       allPhotonPaths.back()->GetXaxis()->SetLabelSize(0.03);
00298       dqm->bookProfile(allEleHistoName, allElePaths.back())->getTProfile();
00299       dqm->bookProfile(allPhotonHistoName, allPhotonPaths.back())->getTProfile();
00300     }
00301   } // loop over postfixes
00302   
00303 }
00304 
00305 //----------------------------------------------------------------------
00306 
00307 TProfile* EmDQMPostProcessor::dividehistos(DQMStore * dqm, const std::string& numName, const std::string& denomName, const std::string& outName,const std::string& label,const std::string& titel){
00308   //std::cout << numName <<std::endl;
00309 
00310   TH1F* num = getHistogram(dqm,numName);
00311 
00312   //std::cout << denomName << std::endl;
00313   TH1F* denom = getHistogram(dqm, denomName);
00314 
00315   if (num == NULL)
00316     edm::LogWarning("EmDQMPostProcessor") << "numerator histogram " << numName << " does not exist";
00317 
00318   if (denom == NULL)
00319     edm::LogWarning("EmDQMPostProcessor") << "denominator histogram " << denomName << " does not exist";
00320 
00321   // Check if histograms actually exist
00322 
00323   if(!num || !denom) return 0;
00324 
00325   // Make sure we are able to book new element
00326   if (!dqm) return 0;
00327   
00328   TProfile* out = dqm->bookProfile(outName,titel,num->GetXaxis()->GetNbins(),num->GetXaxis()->GetXmin(),num->GetXaxis()->GetXmax(),0.,1.2)->getTProfile();
00329   out->GetXaxis()->SetTitle(label.c_str());
00330   out->SetYTitle("Efficiency");
00331   out->SetOption("PE");
00332   out->SetLineColor(2);
00333   out->SetLineWidth(2);
00334   out->SetMarkerStyle(20);
00335   out->SetMarkerSize(0.8);
00336   out->SetStats(kFALSE);
00337   for(int i=1;i<=num->GetNbinsX();i++){
00338     double e, low, high;
00339     Efficiency( (int)num->GetBinContent(i), (int)denom->GetBinContent(i), 0.683, e, low, high );
00340     double err = e-low>high-e ? e-low : high-e;
00341     //here is the trick to store info in TProfile:
00342     out->SetBinContent( i, e );
00343     out->SetBinEntries( i, 1 );
00344     out->SetBinError( i, sqrt(e*e+err*err) );
00345   }
00346 
00347   return out;
00348 }
00349 
00350 //----------------------------------------------------------------------
00351 
00352 TH1F *
00353 EmDQMPostProcessor::getHistogram(DQMStore *dqm, const std::string &histoPath)
00354 {
00355   MonitorElement *monElement = dqm->get(histoPath);
00356   if (monElement != NULL)
00357     return monElement->getTH1F();
00358   else
00359     return NULL;
00360 }
00361 
00362 //----------------------------------------------------------------------
00363 
00364 void 
00365 EmDQMPostProcessor::Efficiency(int passing, int total, double level, double &mode, double &lowerBound, double &upperBound)
00366 { 
00367   // protection (see also TGraphAsymmErrors::Efficiency(..), mimick the old behaviour )
00368   if (total == 0)
00369   {
00370     mode = 0.5;
00371     lowerBound = 0;
00372     upperBound = 1;
00373     return;
00374   }
00375 
00376   mode = passing / ((double) total);
00377 
00378   // note that the order of the two first arguments ('total' and 'passed') is the opposited
00379   // with respect to the earlier TGraphAsymmErrors::Efficiency(..) method
00380   //
00381   // see http://root.cern.ch/root/html/TEfficiency.html#compare for
00382   // an illustration of the coverage of the different methods provided by TEfficiency
00383   
00384   lowerBound = TEfficiency::Wilson(total, passing, level, false);
00385   upperBound = TEfficiency::Wilson(total, passing, level, true);
00386 }
00387 
00388 
00389 //----------------------------------------------------------------------
00390 
00391 DEFINE_FWK_MODULE(EmDQMPostProcessor);
00392 
00393 //----------------------------------------------------------------------