CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_3/src/DQMServices/Diagnostic/scripts/Database/Python/runregparse.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 # LM: version date: 01/02/2010 --> fixed dataset search and added json output file (optional)
00004 # LM: updated 03/04/2010 --> adapted to new runreg api (and dcs status info)
00005 # LM: updated 15/04/2010 --> added bfield threshold
00006 
00007 # include XML-RPC client library
00008 # RR API uses XML-RPC webservices interface for data access
00009 import xmlrpclib,sys,ConfigParser,os,string,commands,time,re
00010 # for json support
00011 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
00012     import json
00013 except:
00014     try:
00015         import simplejson as json
00016     except:
00017         print "Please use lxplus or set an environment (for example crab) with json lib available"
00018         sys.exit(1)
00019 
00020 global QF_Req,ls_temp_data,QF_ALL_SYS,EXCEPTION,EXRUN
00021 EXCEPTION=False
00022 EXRUN=-1
00023 
00024 def invert_intervals(intervals,min_val=1,max_val=9999):
00025     # first order and merge in case 
00026     if not intervals:
00027         return []
00028     intervals=merge_intervals(intervals)
00029     intervals = sorted(intervals, key = lambda x: x[0])
00030     result = []
00031     if min_val==-1:
00032         # defin min and max
00033         (a,b)=intervals[0]
00034         min_val=a
00035     if max_val==-1:
00036         (a,b)=intervals[len(intervals)-1]
00037         max_val=b
00038 
00039     curr_min=min_val
00040     for (x,y) in intervals:
00041         if x>curr_min:
00042             result.append((curr_min,x-1))
00043         curr_min=y+1
00044     if curr_min<max_val:
00045         result.append((curr_min,max_val))
00046 
00047 #    print min_val,max_val
00048     return result
00049 
00050 def merge_intervals(intervals):
00051     if not intervals:
00052         return []
00053     intervals = sorted(intervals, key = lambda x: x[0])
00054     result = []
00055     (a, b) = intervals[0]
00056     for (x, y) in intervals[1:]:
00057         if x <= b:
00058             b = max(b, y)
00059         else:
00060             result.append((a, b))
00061             (a, b) = (x, y)
00062     result.append((a, b))
00063     return result
00064 
00065 def remove_html_tags(data):
00066     p = re.compile(r'<.*?>')
00067     newdata=p.sub('', data)
00068     newdata=newdata.replace("&nbsp;","")
00069     return newdata
00070 
00071 def remove_extra_spaces(data):
00072     result= re.sub(r'\s', '', data)
00073     return result
00074 
00075 def searchrun(runno):
00076     global QF_Req,ls_temp_data,QF_ALL_SYS,EXCEPTION,EXRUN
00077     intervallist=[]
00078     selectls=""
00079 
00080     for line in ls_temp_data.split("\n"):
00081         if runno in line:
00082 #            print line
00083             try:
00084                 if "%%%BAD LS INFO BEGIN%%%" in line:
00085                     selectls=line.split("%%%BAD LS INFO BEGIN%%%")[1]
00086                     selectls=selectls.split("%%%BAD LS INFO END%%%")[0]
00087                     selectls=remove_html_tags(selectls)
00088                     selectls=remove_extra_spaces(selectls)
00089                     # print selectls
00090                     for tag in QF_ALL_SYS:
00091                         selectls=selectls.replace(tag+":","\n"+tag+":")
00092                     # print selectls
00093                     
00094                     for line in selectls.split("\n"):
00095                         try:
00096                             tag=line.split(":")[0]
00097                             intervals=line.split(":")[1]
00098                         except:
00099                             continue
00100                         if tag in QF_Req.keys():
00101                             if QF_Req[tag]=="GOOD":
00102                                 for interval in intervals.split(","):
00103                                     if "ALL" in interval:
00104                                         lmin=1
00105                                         lmax=9999
00106                                     else:
00107                                         strmin=interval.split('-')[0]
00108                                         strmax=interval.split('-')[1]
00109                                         lmin=int(strmin)
00110                                         if "END" in strmax: 
00111                                             lmax=9999
00112                                         else:
00113                                             lmax=int(strmax)
00114                                     intervallist.append((lmin,lmax))
00115             except:
00116                 EXCEPTION=True
00117                 EXRUN=int(runno)
00118     intervallist=merge_intervals(intervallist)
00119     # print runno, intervallist
00120     return intervallist
00121 
00122 
00123 
00124 #main starts here#
00125 
00126 QF_Req={}
00127 GOODRUN={}
00128 compactList = {} 
00129 
00130 QF_ALL_SYS=["Hcal","Track","Strip","Egam","Es","Dt","Csc","Pix","Muon","Rpc","Castor","Jmet","Ecal","L1t","Hlt","NONE"]
00131 QF_ALL_STAT=["GOOD","BAD","EXCL","NONE"]
00132 DCS_ALL=['Bpix','Fpix','Tibtid','TecM','TecP','Tob','Ebminus','Ebplus','EeMinus','EePlus','EsMinus','EsPlus','HbheA','HbheB','HbheC','H0','Hf','Dtminus','Dtplus','Dt0','CscMinus','CscPlus','Rpc','Castor',"NONE"]
00133 
00134 # reading config file
00135 CONFIGFILE='runreg.cfg'
00136 CONFIG = ConfigParser.ConfigParser()
00137 print 'Reading configuration file from ',CONFIGFILE
00138 CONFIG.read(CONFIGFILE)
00139 
00140 DATASET=CONFIG.get('Common','Dataset')
00141 GROUP=CONFIG.get('Common','Group')
00142 HLTNAMEFILTER=CONFIG.get('Common','HLTnameFilter')
00143 ADDRESS=CONFIG.get('Common','RunReg')
00144 RUNMIN=CONFIG.get('Common','Runmin')
00145 RUNMAX=CONFIG.get('Common','Runmax')
00146 QFLAGS=CONFIG.get('Common','QFLAGS')
00147 BFIELD=CONFIG.get('Common','BField_thr')
00148 LSPARSE=CONFIG.get('Common','LSCOMMENT')
00149 DCSSTAT=CONFIG.get('Common','DCS')
00150 DCSLIST=string.split(DCSSTAT,',')
00151 
00152 OUTPUTFILENAME=CONFIG.get('Common',"OutputFileName")
00153 
00154 LSCOMMENT=True
00155 if "TRUE" in LSPARSE.upper() or "1" in LSPARSE.upper() or "YES" in LSPARSE.upper():
00156     LSCOMMENT=True
00157 elif "FALSE" in LSPARSE.upper() or "0" in LSPARSE.upper() or "NO" in LSPARSE.upper():
00158     LSCOMMENT=False
00159 else:
00160     print "Error in parsing LSCOMMENT cfg parameter: LSPARSE"
00161     sys.exit(1)
00162 
00163 QFlist=string.split(QFLAGS,',')
00164 for QF in QFlist:
00165     syst=string.split(QF,":")[0]
00166     value=string.split(QF,":")[1]
00167     if syst not in QF_ALL_SYS or value not in QF_ALL_STAT:
00168         print "QFLAG not valid:",syst,value 
00169         sys.exit(1)
00170     QF_Req[syst]=value
00171 
00172 for dcs in DCSLIST:
00173     if dcs not in DCS_ALL:
00174         print "DCS not valid:",dcs
00175         sys.exit(1)
00176 
00177 
00178 CFGLIST=CONFIG.items('Common')
00179 JSONFILE=CONFIG.get('Common','JSONFILE')
00180 
00181 try:
00182     BFIELD_float=float(BFIELD)
00183 except:
00184     print "BFIELD threshold value not understood:",BFIELD
00185     sys.exit(1)
00186 
00187 # report the request
00188 
00189 print "You asked for the runreg info in the run range:"+RUNMIN+"-"+RUNMAX
00190 print "for dataset: "+DATASET
00191 print "with the following quality flags:"
00192 for SS in QF_Req.keys():
00193     print SS, QF_Req[SS]
00194 print "and with the following DCS status:"
00195 for dcs in DCSLIST:
00196     print dcs
00197 print "Manual bad LS in comment column:",LSCOMMENT
00198 #sys.exit(1)
00199  
00200 # get handler to RR XML-RPC server
00201 FULLADDRESS=ADDRESS+"/xmlrpc"
00202 print "RunRegistry from: ",FULLADDRESS
00203 server = xmlrpclib.ServerProxy(FULLADDRESS)
00204 
00205 # build up selection in RUN table
00206 sel_runtable="{groupName} ='"+GROUP+"' and {runNumber} >= "+RUNMIN+" and {runNumber} <= "+RUNMAX+" and {bfield}>"+BFIELD+" and {datasetName} LIKE '"+DATASET+"'"
00207 
00208 # the lumisection selection is on the Express dataset:
00209 sel_dstable="{groupName} ='"+GROUP+"' and {runNumber} >= "+RUNMIN+" and {runNumber} <= "+RUNMAX+" and {bfield}>"+BFIELD+" and {datasetName} LIKE '%Express%'"
00210 
00211 for key in QF_Req.keys():
00212     if key != "NONE" and QF_Req[key]!="NONE":
00213         sel_runtable+=" and {cmp"+key+"} = '"+QF_Req[key]+"'"
00214         sel_dstable+=" and {cmp"+key+"} = '"+QF_Req[key]+"'"
00215 #print sel_runtable
00216 
00217 # build up selection in RUNLUMISECTION table, not requestuing bfield here because only runs in the run table selection will be considered
00218 sel_dcstable="{groupName} ='"+GROUP+"' and {runNumber} >= "+RUNMIN+" and {runNumber} <= "+RUNMAX
00219 for dcs in DCSLIST:
00220     if dcs !="NONE":
00221         sel_dcstable+=" and {parDcs"+dcs+"} = 1"
00222 # = 'True'"
00223 # print sel_dcstable
00224 
00225 Tries=0
00226 print " " 
00227 while Tries<10:
00228     try:
00229         print "Accessing run registry...."
00230         dcs_data = server.DataExporter.export('RUNLUMISECTION', 'GLOBAL', 'json', sel_dcstable)
00231         run_data = server.DataExporter.export('RUN', 'GLOBAL', 'csv_runs', sel_runtable)
00232         ls_temp_data = server.DataExporter.export('RUN', 'GLOBAL', 'csv_datasets', sel_dstable)
00233         break
00234     except:
00235         print "Something wrong in accessing runregistry, retrying in 3s...."
00236         Tries=Tries+1
00237         time.sleep(3)
00238 if Tries==10:
00239     print "Run registry unaccessible.....exiting now"
00240     sys.exit(1)
00241     
00242 #print dcs_data
00243 # print "run data: ", run_data
00244 #print ls_temp_data
00245 # find LS info in comment
00246 
00247 
00248 
00249 LISTOFRUN=[]
00250 selectedRuns = open(OUTPUTFILENAME, 'w')
00251 print "Saving selected runs to file OUTPUTFILENAME"
00252 for line in run_data.split("\n"):
00253     run=line.split(',')[0]
00254     if run.isdigit():
00255         hlt=line.split(',')[9]
00256         print "for run", run, "hlt is", hlt
00257         if HLTNAMEFILTER == "" or hlt.find(HLTNAMEFILTER):
00258             LISTOFRUN.append(run)
00259             selectedRuns.write(run+"\n")
00260 selectedRuns.close()
00261 
00262 selected_dcs={}
00263 jsonlist=json.loads(dcs_data)
00264 
00265 
00266 for element in jsonlist:
00267     if element in LISTOFRUN:
00268 # first search manual ls certification
00269         if LSCOMMENT:
00270             # using LS intervals in comment
00271             manualbad_int=searchrun(element)
00272         # make a badlumi list
00273             dcsbad_int=invert_intervals(jsonlist[element])
00274             combined=[]
00275             for interval in  manualbad_int:
00276                 combined.append(interval)
00277             for interval in  dcsbad_int:
00278                 combined.append(interval)
00279             combined=merge_intervals(combined)
00280             combined=invert_intervals(combined)
00281             selected_dcs[element]=combined
00282         else:
00283             # using only DCS info
00284             selected_dcs[element]=jsonlist[element]
00285         # combined include bith manual LS and DCS LS
00286 
00287 #JSONOUT=json.dumps(selected_dcs)
00288 # WARNING: Don't use selected_dcs before dumping into file, it gets screwed up (don't know why!!)
00289 if JSONFILE != "NONE":
00290     lumiSummary = open(JSONFILE, 'w')
00291     json.dump(selected_dcs, lumiSummary)
00292     lumiSummary.close() 
00293     print " "
00294     print "-------------------------------------------"
00295     print "Json file: ",JSONFILE," written."
00296 
00297 
00298 # buildup cms snippet
00299 selectlumi="process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange(\n"
00300 ranges = []
00301 runs_to_print = selected_dcs.keys()
00302 runs_to_print.sort()
00303 for run in runs_to_print:
00304    blocks = selected_dcs[run]
00305    blocks.sort()
00306    prevblock = [-2,-2]
00307    for lsrange in blocks:
00308        if lsrange[0] == prevblock[1]+1:
00309            print "Run: ",run,"- This lumi starts at ", lsrange[0], " previous ended at ", prevblock[1]+1, " so I should merge"
00310            prevblock[1] = lsrange[1]
00311            ranges[-1] = "\t'%s:%d-%s:%d',\n" % (run, prevblock[0],
00312 run, prevblock[1])
00313        else:
00314            ranges.append("\t'%s:%d-%s:%d',\n" % (run, lsrange[0],
00315 run, lsrange[1]))
00316            prevblock = lsrange
00317 selectlumi += "".join(ranges)
00318 selectlumi += ")"
00319 
00320 
00321 print "-------------------------------------------"
00322 print " "
00323 print "CFG snippet to select:"
00324 print selectlumi
00325 
00326 if EXCEPTION:
00327     print "WARNING: Something wrong in manual lumisection selection tag for run: "+str(EXRUN)