CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/Alignment/MuonAlignmentAlgorithms/scripts/findQualityFiles.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 
00003 ######################################################
00004 ### See documentation at
00005 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
00006 ### also run it with -h option
00007 ######################################################
00008 
00009 import os,sys, DLFCN
00010 import optparse
00011 
00012 # for RunInfo API
00013 from pluginCondDBPyInterface import *
00014 from CondCore.Utilities import iovInspector as inspect
00015 from CondCore.Utilities.timeUnitHelper import *
00016 
00017 # for RunRegistry API
00018 import xmlrpclib
00019 
00020 # for json support
00021 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
00022     import json
00023 except:
00024     try:
00025         import simplejson as json
00026     except:
00027         print "Please use lxplus or set an environment (for example crab) with json lib available"
00028         sys.exit(1)
00029 
00030 ######################################################
00031 print "### command line:"
00032 copyargs = sys.argv[:]
00033 for i in range(len(copyargs)):
00034   if copyargs[i] == "":
00035     copyargs[i] = "\"\""
00036   if copyargs[i].find(" ") != -1:
00037     copyargs[i] = "\"%s\"" % copyargs[i]
00038 commandline = " ".join(copyargs)
00039 
00040 print commandline
00041 infotofile = ["### %s\n" % commandline]
00042 
00043 ######################################################
00044 # To parse commandline args
00045 
00046 usage='%prog [options]\n\n'+\
00047     'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
00048 
00049 parser=optparse.OptionParser(usage)
00050 
00051 parser.add_option("-d", "--alcaDataset",
00052                    help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
00053                    type="string",
00054                    #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
00055                    #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
00056                    default='',
00057                    dest="alcaDataset")
00058 
00059 parser.add_option("-m", "--isMC",
00060                    help="Whether sample is MC (true) or real data (false).",
00061                    type="string",
00062                    default="false",
00063                    dest="isMC")
00064 
00065 parser.add_option("-s", "--startRun",
00066                    help="First run number in range.",
00067                    type="int",
00068                    default=0L,
00069                    dest="startRun")
00070 
00071 parser.add_option("-e", "--endRun",
00072                    help="Last run number in range.",
00073                    type="int",
00074                    default=999999999L,
00075                    dest="endRun")
00076 
00077 parser.add_option("-b", "--minB",
00078                    help="Lower limit on minimal B field for a run.",
00079                    type="float",
00080                    #default=3.77,
00081                    default=0.,
00082                    dest="minB")
00083 
00084 parser.add_option("--maxB",
00085                    help="Upper limit on B field for a run.",
00086                    type="float",
00087                    default=999.,
00088                    dest="maxB")
00089 
00090 parser.add_option("-r","--runRegistry",
00091                    help="If present, use RunRegistry API for B field and data quality quiery",
00092                    action="store_true",
00093                    default=False,
00094                    dest="runRegistry")
00095 
00096 parser.add_option("-j","--json",
00097                    help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
00098                    "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
00099                    type="string",
00100                    default="",
00101                    dest="json")
00102 
00103 parser.add_option("-t", "--dbTag",
00104                    help="Runinfo DB tag to use.",
00105                    type="string",
00106                    default="runinfo_31X_hlt",
00107                    dest="dbTag")
00108 
00109 parser.add_option("--printTags",
00110                    help="If present, the only thing script will do is printing list of tags in the DB",
00111                    action="store_true",
00112                    default=False,
00113                    dest="printTags")
00114 
00115 parser.add_option("--dbName",
00116                    help="RunInfo DB name to use. The default one is "+\
00117                    "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
00118                    type="string",
00119                    default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
00120                    dest="dbName")
00121 
00122 parser.add_option("--dqDataset",
00123                    help="Dataset name to query for good data quality runs. "+\
00124                    "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
00125                    "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
00126                    "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
00127                    type="string",
00128                    #default="/Cosmics/Commissioning08-v1/RAW",
00129                    #default="/Cosmics/CRAFT09-v1/RAW",
00130                    default="",
00131                    dest="dqDataset")
00132 
00133 parser.add_option("-c", "--dqCriteria",
00134                    help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
00135                    "An example of a really strict condition:\n"
00136                    "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
00137                    "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
00138                    "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
00139                    type="string",
00140                    #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
00141                    #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
00142                    #default="DT_Shift_Offline=GOOD",
00143                    default="",
00144                    dest="dqCriteria")
00145 
00146 parser.add_option("-o", "--outputFile",
00147                    help="Name for output file (please include the .py suffix)",
00148                    type="string",
00149                    default="filelist.py",
00150                    dest="outputFile")
00151 
00152 parser.add_option("-v", "--verbose",
00153                    help="Degree of debug info verbosity",
00154                    type="int",
00155                    default=0,
00156                    dest="verbose")
00157 
00158 options,args=parser.parse_args() 
00159 
00160 #if '' in (options.infilename,
00161 #          options.outfilename,
00162 #          options.outputCommands):
00163 #    raise ('Incomplete list of arguments!')
00164 
00165 
00166 if options.alcaDataset=='' and not options.printTags:
00167     print "--alcaDataset /your/dataset/name is required!"
00168     sys.exit()
00169     
00170 if options.dqDataset=='':
00171     options.dqDataset = options.alcaDataset
00172 
00173 if not (options.isMC=='true' or options.isMC=='false'):
00174     print "--isMC option can have only 'true' or 'false' arguments"
00175     sys.exit()
00176 
00177 v = options.verbose
00178 
00179 minI = options.minB*18160/3.8
00180 maxI = options.maxB*18160/3.8
00181 
00182 
00183 rr = ''
00184 if options.runRegistry: rr = ' --runRegistry'
00185 
00186 jj = ''
00187 if options.json!='': jj = ' --json '+options.json
00188 
00189 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
00190              ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
00191              ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
00192              ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
00193              ' --outputFile ' + options.outputFile
00194 
00195 print "### all options, including default:"
00196 print allOptions
00197 
00198 
00199 ######################################################
00200 # functions definitions
00201 
00202 
00203 #########################
00204 # get good B field runs from RunInfo DB
00205 def getGoodBRuns():
00206 
00207     runs_b_on = []
00208 
00209     sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
00210 
00211     a = FWIncantation()
00212     #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
00213     rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
00214 
00215     db = rdbms.getDB(options.dbName)
00216     tags = db.allTags()
00217 
00218     if options.printTags:
00219         print  "\nOverview of all tags in "+options.dbName+" :\n"
00220         print tags
00221         print "\n"
00222         sys.exit()
00223 
00224     # for inspecting last run after run has started  
00225     #tag = 'runinfo_31X_hlt'
00226     tag = options.dbTag
00227 
00228     # for inspecting last run after run has stopped  
00229     #tag = 'runinfo_test'
00230 
00231     try :
00232         #log = db.lastLogEntry(tag)
00233 
00234         #for printing all log info present into log db 
00235         #print log.getState()
00236 
00237         iov = inspect.Iov(db,tag)
00238         #print "########overview of tag "+tag+"########"
00239         #print iov.list()
00240     
00241         if v>1 :
00242             print "######## summries ########"
00243             for x in  iov.summaries():
00244                 print x[0], x[1], x[2] ,x[3]
00245     
00246         what={}
00247     
00248         if v>1 :
00249             print "###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###"
00250             print iov.trend(what)
00251     
00252         if v>0:
00253             print "######## trends ########"
00254         for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
00255             if v>0 or x[0]==67647L or x[0]==66893L or x[0]==67264L:
00256                 print x[0],x[1] ,x[2], x[2][4], x[2][3]
00257                 #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
00258             if x[2][4] >= minI and x[2][3] <= maxI:
00259                 runs_b_on.append(int(x[0]))
00260 
00261     except Exception, er :
00262         print er
00263 
00264     print "### runs with good B field ###"
00265     print runs_b_on
00266 
00267     return runs_b_on
00268 
00269 
00270 #########################
00271 # obtaining list of good quality runs
00272 
00273 def getGoodQRuns():
00274 
00275     runs_good_dq = []
00276 
00277     dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
00278     print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
00279 
00280     os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
00281 
00282     #print 'python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
00283 
00284     ff = open('/tmp/runs_full_of_pink_bunnies', "r")
00285     line = ff.readline()
00286     while line and line!='':
00287         runs_good_dq.append(int(line))
00288         line = ff.readline()
00289     ff.close()
00290 
00291     os.system('rm /tmp/runs_full_of_pink_bunnies')
00292 
00293     print "### runs with good quality ###"
00294     print runs_good_dq
00295 
00296     return runs_good_dq
00297 
00298 #########################
00299 # obtaining list of good B and quality runs from Run Registry
00300 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
00301 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
00302 
00303 def getRunRegistryGoodRuns():
00304 
00305     server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
00306     
00307     rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
00308                 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
00309     if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
00310     
00311     rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
00312     rrstr = rrstr.replace("bfield","'bfield'")
00313     rrstr = rrstr.replace("events","'events'")
00314     rrdata = eval(rrstr)
00315 
00316     runs_good = []
00317     for rr in rrdata['events']: runs_good.append(rr[0])
00318 
00319     return runs_good
00320 
00321 #########################
00322 # obtain a list of good runs from JSON file
00323 
00324 def getJSONGoodRuns():
00325 
00326     # read json file
00327     jsonfile=file(options.json,'r')
00328     jsondict = json.load(jsonfile)
00329 
00330     runs_good = []
00331     for run in jsondict.keys(): runs_good.append(int(run))
00332     runs_good.sort()
00333 
00334     #mruns=[]
00335     #for run in jsondict.keys():
00336     #  if int(run)<144115 and int(run)>136034: mruns.append(int(run))
00337     #mruns.sort()
00338     #print len(mruns),"runs in \n",mruns
00339     
00340     return runs_good
00341 
00342 ######################################################
00343 # get good B field runs from RunInfo DB
00344 
00345 runs_b_on = []
00346 
00347 if options.isMC=='false' and not options.runRegistry and options.json=='':
00348     runs_b_on = getGoodBRuns()
00349 
00350     infotofile.append("### runs with good B field ###\n")
00351     infotofile.append("### %s\n" % str(runs_b_on))
00352 
00353 ######################################################
00354 # Add requiremment of good quality runs
00355 
00356 runs_good_dq = []
00357 runs_good = []
00358 
00359 if options.isMC=='false' and not options.runRegistry and options.json=='':
00360     runs_good_dq = getGoodQRuns()
00361         
00362     infotofile.append("### runs with good quality ###\n")
00363     infotofile.append("### %s\n" % str(runs_good_dq))
00364 
00365     # find intersection of runs_b_on and runs_good_dq
00366     runs_good = [val for val in runs_b_on if val in runs_good_dq]
00367 
00368     print "### runs with good B field and quality ###"
00369     print runs_good
00370 
00371     infotofile.append("### runs with good B field and quality ###\n")
00372     infotofile.append("### %s\n" % str(runs_good))
00373 
00374 ######################################################
00375 # use run registry API is specified
00376 
00377 if options.isMC=='false' and options.runRegistry and options.json=='':
00378     runs_good = getRunRegistryGoodRuns()
00379     print "### runs with good B field and quality ###"
00380     print runs_good
00381     
00382     #infotofile.append("### runs with good B field and quality ###\n")
00383     #infotofile.append("### %s\n" % str(runs_good))
00384 
00385 ######################################################
00386 # use JSON file if specified
00387 
00388 if options.isMC=='false' and options.json!='':
00389     runs_good = getJSONGoodRuns()
00390     print "### good runs from JSON file ###"
00391     print runs_good
00392 
00393 ######################################################
00394 # Find files for good runs
00395 
00396 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
00397 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
00398 
00399 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
00400 
00401 list_of_files = []
00402 list_of_runs = []
00403 list_of_numevents = []
00404 total_numevents = 0
00405 
00406 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
00407 for line in ff:
00408     (run, numevents, fname) = line.split('   ')
00409     if options.isMC=='false' and (int(run) not in runs_good):
00410         continue
00411     fname = fname.rstrip('\n')
00412     list_of_files.append(fname)
00413     list_of_runs.append(int(run))
00414     list_of_numevents.append(numevents)
00415     total_numevents += int(numevents)
00416 ff.close()
00417 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
00418 
00419 uniq_list_of_runs = list(set(list_of_runs))
00420 uniq_list_of_runs.sort()
00421 
00422 print "### list of runs with good B field and quality in the dataset: ###"
00423 print uniq_list_of_runs
00424 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
00425 infotofile.append("### %s\n" % str(uniq_list_of_runs))
00426 
00427 
00428 # prevent against duplication due to the fact now a file can have events from several runs
00429 files_events = zip(list_of_files, list_of_numevents)
00430 unique_files_events = list(set(files_events))
00431 list_of_files, list_of_numevents = map(list, zip(*unique_files_events))
00432 total_numevents = sum( map(int, list_of_numevents) )
00433 
00434 print "### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents)
00435 
00436 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
00437 
00438 ######################################################
00439 # Write out results
00440 
00441 # ff = open(options.outputFile+'.txt','w')
00442 size = len(list_of_files)
00443 # for i in range(0,size):
00444 #     ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
00445 # ff.close()
00446 
00447 ff = open(options.outputFile,'w')
00448 ff.write("".join(infotofile))
00449 ff.write("\nfileNames = [\n")
00450 comma = ","
00451 for i in range(0,size):
00452     if i==size-1:
00453         comma=""
00454     #ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
00455     ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
00456 ff.write(']\n')
00457 ff.close()
00458