00001
00002
00003
00004
00005
00006
00007
00008
00009 import os,sys, DLFCN
00010 import optparse
00011
00012
00013 from pluginCondDBPyInterface import *
00014 from CondCore.Utilities import iovInspector as inspect
00015 from CondCore.Utilities.timeUnitHelper import *
00016
00017
00018 import xmlrpclib
00019
00020
00021 try:
00022 import json
00023 except:
00024 try:
00025 import simplejson as json
00026 except:
00027 print "Please use lxplus or set an environment (for example crab) with json lib available"
00028 sys.exit(1)
00029
00030
00031 print "### command line:"
00032 copyargs = sys.argv[:]
00033 for i in range(len(copyargs)):
00034 if copyargs[i] == "":
00035 copyargs[i] = "\"\""
00036 if copyargs[i].find(" ") != -1:
00037 copyargs[i] = "\"%s\"" % copyargs[i]
00038 commandline = " ".join(copyargs)
00039
00040 print commandline
00041 infotofile = ["### %s\n" % commandline]
00042
00043
00044
00045
00046 usage='%prog [options]\n\n'+\
00047 'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
00048
00049 parser=optparse.OptionParser(usage)
00050
00051 parser.add_option("-d", "--alcaDataset",
00052 help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
00053 type="string",
00054
00055
00056 default='',
00057 dest="alcaDataset")
00058
00059 parser.add_option("-m", "--isMC",
00060 help="Whether sample is MC (true) or real data (false).",
00061 type="string",
00062 default="false",
00063 dest="isMC")
00064
00065 parser.add_option("-s", "--startRun",
00066 help="First run number in range.",
00067 type="int",
00068 default=0L,
00069 dest="startRun")
00070
00071 parser.add_option("-e", "--endRun",
00072 help="Last run number in range.",
00073 type="int",
00074 default=999999999L,
00075 dest="endRun")
00076
00077 parser.add_option("-b", "--minB",
00078 help="Lower limit on minimal B field for a run.",
00079 type="float",
00080
00081 default=0.,
00082 dest="minB")
00083
00084 parser.add_option("--maxB",
00085 help="Upper limit on B field for a run.",
00086 type="float",
00087 default=999.,
00088 dest="maxB")
00089
00090 parser.add_option("-r","--runRegistry",
00091 help="If present, use RunRegistry API for B field and data quality quiery",
00092 action="store_true",
00093 default=False,
00094 dest="runRegistry")
00095
00096 parser.add_option("-j","--json",
00097 help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
00098 "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
00099 type="string",
00100 default="",
00101 dest="json")
00102
00103 parser.add_option("-t", "--dbTag",
00104 help="Runinfo DB tag to use.",
00105 type="string",
00106 default="runinfo_31X_hlt",
00107 dest="dbTag")
00108
00109 parser.add_option("--printTags",
00110 help="If present, the only thing script will do is printing list of tags in the DB",
00111 action="store_true",
00112 default=False,
00113 dest="printTags")
00114
00115 parser.add_option("--dbName",
00116 help="RunInfo DB name to use. The default one is "+\
00117 "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
00118 type="string",
00119 default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
00120 dest="dbName")
00121
00122 parser.add_option("--dqDataset",
00123 help="Dataset name to query for good data quality runs. "+\
00124 "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
00125 "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
00126 "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
00127 type="string",
00128
00129
00130 default="",
00131 dest="dqDataset")
00132
00133 parser.add_option("-c", "--dqCriteria",
00134 help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
00135 "An example of a really strict condition:\n"
00136 "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
00137 "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
00138 "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
00139 type="string",
00140
00141
00142
00143 default="",
00144 dest="dqCriteria")
00145
00146 parser.add_option("-o", "--outputFile",
00147 help="Name for output file (please include the .py suffix)",
00148 type="string",
00149 default="filelist.py",
00150 dest="outputFile")
00151
00152 parser.add_option("-v", "--verbose",
00153 help="Degree of debug info verbosity",
00154 type="int",
00155 default=0,
00156 dest="verbose")
00157
00158 options,args=parser.parse_args()
00159
00160
00161
00162
00163
00164
00165
00166 if options.alcaDataset=='' and not options.printTags:
00167 print "--alcaDataset /your/dataset/name is required!"
00168 sys.exit()
00169
00170 if options.dqDataset=='':
00171 options.dqDataset = options.alcaDataset
00172
00173 if not (options.isMC=='true' or options.isMC=='false'):
00174 print "--isMC option can have only 'true' or 'false' arguments"
00175 sys.exit()
00176
00177 v = options.verbose
00178
00179 minI = options.minB*18160/3.8
00180 maxI = options.maxB*18160/3.8
00181
00182
00183 rr = ''
00184 if options.runRegistry: rr = ' --runRegistry'
00185
00186 jj = ''
00187 if options.json!='': jj = ' --json '+options.json
00188
00189 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
00190 ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
00191 ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
00192 ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
00193 ' --outputFile ' + options.outputFile
00194
00195 print "### all options, including default:"
00196 print allOptions
00197
00198
00199
00200
00201
00202
00203
00204
00205 def getGoodBRuns():
00206
00207 runs_b_on = []
00208
00209 sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
00210
00211 a = FWIncantation()
00212
00213 rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
00214
00215 db = rdbms.getDB(options.dbName)
00216 tags = db.allTags()
00217
00218 if options.printTags:
00219 print "\nOverview of all tags in "+options.dbName+" :\n"
00220 print tags
00221 print "\n"
00222 sys.exit()
00223
00224
00225
00226 tag = options.dbTag
00227
00228
00229
00230
00231 try :
00232
00233
00234
00235
00236
00237 iov = inspect.Iov(db,tag)
00238
00239
00240
00241 if v>1 :
00242 print "######## summries ########"
00243 for x in iov.summaries():
00244 print x[0], x[1], x[2] ,x[3]
00245
00246 what={}
00247
00248 if v>1 :
00249 print "###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###"
00250 print iov.trend(what)
00251
00252 if v>0:
00253 print "######## trends ########"
00254 for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
00255 if v>0 or x[0]==67647L or x[0]==66893L or x[0]==67264L:
00256 print x[0],x[1] ,x[2], x[2][4], x[2][3]
00257
00258 if x[2][4] >= minI and x[2][3] <= maxI:
00259 runs_b_on.append(int(x[0]))
00260
00261 except Exception, er :
00262 print er
00263
00264 print "### runs with good B field ###"
00265 print runs_b_on
00266
00267 return runs_b_on
00268
00269
00270
00271
00272
00273 def getGoodQRuns():
00274
00275 runs_good_dq = []
00276
00277 dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
00278 print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
00279
00280 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
00281
00282
00283
00284 ff = open('/tmp/runs_full_of_pink_bunnies', "r")
00285 line = ff.readline()
00286 while line and line!='':
00287 runs_good_dq.append(int(line))
00288 line = ff.readline()
00289 ff.close()
00290
00291 os.system('rm /tmp/runs_full_of_pink_bunnies')
00292
00293 print "### runs with good quality ###"
00294 print runs_good_dq
00295
00296 return runs_good_dq
00297
00298
00299
00300
00301
00302
00303 def getRunRegistryGoodRuns():
00304
00305 server = xmlrpclib.ServerProxy('https://pccmsdqm04.cern.ch/runregistry/xmlrpc')
00306
00307 rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
00308 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
00309 if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
00310
00311 rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
00312 rrstr = rrstr.replace("bfield","'bfield'")
00313 rrstr = rrstr.replace("events","'events'")
00314 rrdata = eval(rrstr)
00315
00316 runs_good = []
00317 for rr in rrdata['events']: runs_good.append(rr[0])
00318
00319 return runs_good
00320
00321
00322
00323
00324 def getJSONGoodRuns():
00325
00326
00327 jsonfile=file(options.json,'r')
00328 jsondict = json.load(jsonfile)
00329
00330 runs_good = []
00331 for run in jsondict.keys(): runs_good.append(int(run))
00332 runs_good.sort()
00333
00334
00335
00336
00337
00338
00339
00340 return runs_good
00341
00342
00343
00344
00345 runs_b_on = []
00346
00347 if options.isMC=='false' and not options.runRegistry and options.json=='':
00348 runs_b_on = getGoodBRuns()
00349
00350 infotofile.append("### runs with good B field ###\n")
00351 infotofile.append("### %s\n" % str(runs_b_on))
00352
00353
00354
00355
00356 runs_good_dq = []
00357 runs_good = []
00358
00359 if options.isMC=='false' and not options.runRegistry and options.json=='':
00360 runs_good_dq = getGoodQRuns()
00361
00362 infotofile.append("### runs with good quality ###\n")
00363 infotofile.append("### %s\n" % str(runs_good_dq))
00364
00365
00366 runs_good = [val for val in runs_b_on if val in runs_good_dq]
00367
00368 print "### runs with good B field and quality ###"
00369 print runs_good
00370
00371 infotofile.append("### runs with good B field and quality ###\n")
00372 infotofile.append("### %s\n" % str(runs_good))
00373
00374
00375
00376
00377 if options.isMC=='false' and options.runRegistry and options.json=='':
00378 runs_good = getRunRegistryGoodRuns()
00379 print "### runs with good B field and quality ###"
00380 print runs_good
00381
00382
00383
00384
00385
00386
00387
00388 if options.isMC=='false' and options.json!='':
00389 runs_good = getJSONGoodRuns()
00390 print "### good runs from JSON file ###"
00391 print runs_good
00392
00393
00394
00395
00396 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
00397
00398
00399 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
00400
00401 list_of_files = []
00402 list_of_runs = []
00403 list_of_numevents = []
00404 total_numevents = 0
00405
00406 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
00407 for line in ff:
00408 (run, numevents, fname) = line.split(' ')
00409 if options.isMC=='false' and (int(run) not in runs_good):
00410 continue
00411 fname = fname.rstrip('\n')
00412 list_of_files.append(fname)
00413 list_of_runs.append(int(run))
00414 list_of_numevents.append(numevents)
00415 total_numevents += int(numevents)
00416 ff.close()
00417
00418
00419 uniq_list_of_runs = list(set(list_of_runs))
00420 uniq_list_of_runs.sort()
00421
00422 print "### list of runs with good B field and quality in the dataset: ###"
00423 print uniq_list_of_runs
00424 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
00425 infotofile.append("### %s\n" % str(uniq_list_of_runs))
00426
00427
00428
00429 files_events = zip(list_of_files, list_of_numevents)
00430 unique_files_events = list(set(files_events))
00431 list_of_files, list_of_numevents = map(list, zip(*unique_files_events))
00432 total_numevents = sum( map(int, list_of_numevents) )
00433
00434 print "### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents)
00435
00436 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
00437
00438
00439
00440
00441
00442 size = len(list_of_files)
00443
00444
00445
00446
00447 ff = open(options.outputFile,'w')
00448 ff.write("".join(infotofile))
00449 ff.write("\nfileNames = [\n")
00450 comma = ","
00451 for i in range(0,size):
00452 if i==size-1:
00453 comma=""
00454
00455 ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
00456 ff.write(']\n')
00457 ff.close()
00458