CMS 3D CMS Logo

findQualityFiles.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 ######################################################
4 ### See documentation at
5 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
6 ### also run it with -h option
7 ######################################################
8 
9 import os,sys, DLFCN
10 import optparse
11 
12 # for RunInfo API
13 from pluginCondDBPyInterface import *
14 from CondCore.Utilities import iovInspector as inspect
16 
17 # for RunRegistry API
18 import xmlrpclib
19 
20 # for json support
21 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
22  import json
23 except:
24  try:
25  import simplejson as json
26  except:
27  print "Please use lxplus or set an environment (for example crab) with json lib available"
28  sys.exit(1)
29 
30 ######################################################
31 print "### command line:"
32 copyargs = sys.argv[:]
33 for i in range(len(copyargs)):
34  if copyargs[i] == "":
35  copyargs[i] = "\"\""
36  if copyargs[i].find(" ") != -1:
37  copyargs[i] = "\"%s\"" % copyargs[i]
38 commandline = " ".join(copyargs)
39 
40 print commandline
41 infotofile = ["### %s\n" % commandline]
42 
43 ######################################################
44 # To parse commandline args
45 
46 usage='%prog [options]\n\n'+\
47  'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
48 
49 parser=optparse.OptionParser(usage)
50 
51 parser.add_option("-d", "--alcaDataset",
52  help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
53  type="string",
54  #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
55  #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
56  default='',
57  dest="alcaDataset")
58 
59 parser.add_option("-m", "--isMC",
60  help="Whether sample is MC (true) or real data (false).",
61  type="string",
62  default="false",
63  dest="isMC")
64 
65 parser.add_option("-s", "--startRun",
66  help="First run number in range.",
67  type="int",
68  default=0,
69  dest="startRun")
70 
71 parser.add_option("-e", "--endRun",
72  help="Last run number in range.",
73  type="int",
74  default=999999999,
75  dest="endRun")
76 
77 parser.add_option("-b", "--minB",
78  help="Lower limit on minimal B field for a run.",
79  type="float",
80  #default=3.77,
81  default=0.,
82  dest="minB")
83 
84 parser.add_option("--maxB",
85  help="Upper limit on B field for a run.",
86  type="float",
87  default=999.,
88  dest="maxB")
89 
90 parser.add_option("-r","--runRegistry",
91  help="If present, use RunRegistry API for B field and data quality quiery",
92  action="store_true",
93  default=False,
94  dest="runRegistry")
95 
96 parser.add_option("-j","--json",
97  help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
98  "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
99  type="string",
100  default="",
101  dest="json")
102 
103 parser.add_option("-t", "--dbTag",
104  help="Runinfo DB tag to use.",
105  type="string",
106  default="runinfo_31X_hlt",
107  dest="dbTag")
108 
109 parser.add_option("--printTags",
110  help="If present, the only thing script will do is printing list of tags in the DB",
111  action="store_true",
112  default=False,
113  dest="printTags")
114 
115 parser.add_option("--dbName",
116  help="RunInfo DB name to use. The default one is "+\
117  "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
118  type="string",
119  default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
120  dest="dbName")
121 
122 parser.add_option("--dqDataset",
123  help="Dataset name to query for good data quality runs. "+\
124  "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
125  "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
126  "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
127  type="string",
128  #default="/Cosmics/Commissioning08-v1/RAW",
129  #default="/Cosmics/CRAFT09-v1/RAW",
130  default="",
131  dest="dqDataset")
132 
133 parser.add_option("-c", "--dqCriteria",
134  help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
135  "An example of a really strict condition:\n"
136  "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
137  "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
138  "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
139  type="string",
140  #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
141  #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
142  #default="DT_Shift_Offline=GOOD",
143  default="",
144  dest="dqCriteria")
145 
146 parser.add_option("-o", "--outputFile",
147  help="Name for output file (please include the .py suffix)",
148  type="string",
149  default="filelist.py",
150  dest="outputFile")
151 
152 parser.add_option("-v", "--verbose",
153  help="Degree of debug info verbosity",
154  type="int",
155  default=0,
156  dest="verbose")
157 
158 options,args=parser.parse_args()
159 
160 #if '' in (options.infilename,
161 # options.outfilename,
162 # options.outputCommands):
163 # raise ('Incomplete list of arguments!')
164 
165 
166 if options.alcaDataset=='' and not options.printTags:
167  print "--alcaDataset /your/dataset/name is required!"
168  sys.exit()
169 
170 if options.dqDataset=='':
171  options.dqDataset = options.alcaDataset
172 
173 if not (options.isMC=='true' or options.isMC=='false'):
174  print "--isMC option can have only 'true' or 'false' arguments"
175  sys.exit()
176 
177 v = options.verbose
178 
179 minI = options.minB*18160/3.8
180 maxI = options.maxB*18160/3.8
181 
182 
183 rr = ''
184 if options.runRegistry: rr = ' --runRegistry'
185 
186 jj = ''
187 if options.json!='': jj = ' --json '+options.json
188 
189 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
190  ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
191  ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
192  ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
193  ' --outputFile ' + options.outputFile
194 
195 print "### all options, including default:"
196 print allOptions
197 
198 
199 ######################################################
200 # functions definitions
201 
202 
203 #########################
204 # get good B field runs from RunInfo DB
206 
207  runs_b_on = []
208 
209  sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
210 
211  a = FWIncantation()
212  #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
213  rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
214 
215  db = rdbms.getDB(options.dbName)
216  tags = db.allTags()
217 
218  if options.printTags:
219  print "\nOverview of all tags in "+options.dbName+" :\n"
220  print tags
221  print "\n"
222  sys.exit()
223 
224  # for inspecting last run after run has started
225  #tag = 'runinfo_31X_hlt'
226  tag = options.dbTag
227 
228  # for inspecting last run after run has stopped
229  #tag = 'runinfo_test'
230 
231  try :
232  #log = db.lastLogEntry(tag)
233 
234  #for printing all log info present into log db
235  #print log.getState()
236 
237  iov = inspect.Iov(db,tag)
238  #print "########overview of tag "+tag+"########"
239  #print iov.list()
240 
241  if v>1 :
242  print "######## summries ########"
243  for x in iov.summaries():
244  print x[0], x[1], x[2] ,x[3]
245 
246  what={}
247 
248  if v>1 :
249  print "###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###"
250  print iov.trend(what)
251 
252  if v>0:
253  print "######## trends ########"
254  for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
255  if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
256  print x[0],x[1] ,x[2], x[2][4], x[2][3]
257  #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
258  if x[2][4] >= minI and x[2][3] <= maxI:
259  runs_b_on.append(int(x[0]))
260 
261  except Exception as er :
262  print er
263 
264  print "### runs with good B field ###"
265  print runs_b_on
266 
267  return runs_b_on
268 
269 
270 #########################
271 # obtaining list of good quality runs
272 
274 
275  runs_good_dq = []
276 
277  dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
278  print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
279 
280  os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
281 
282  #print 'python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
283 
284  ff = open('/tmp/runs_full_of_pink_bunnies', "r")
285  line = ff.readline()
286  while line and line!='':
287  runs_good_dq.append(int(line))
288  line = ff.readline()
289  ff.close()
290 
291  os.system('rm /tmp/runs_full_of_pink_bunnies')
292 
293  print "### runs with good quality ###"
294  print runs_good_dq
295 
296  return runs_good_dq
297 
298 #########################
299 # obtaining list of good B and quality runs from Run Registry
300 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
301 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
302 
304 
305  server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
306 
307  rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
308  " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
309  if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
310 
311  rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
312  rrstr = rrstr.replace("bfield","'bfield'")
313  rrstr = rrstr.replace("events","'events'")
314  rrdata = eval(rrstr)
315 
316  runs_good = []
317  for rr in rrdata['events']: runs_good.append(rr[0])
318 
319  return runs_good
320 
321 #########################
322 # obtain a list of good runs from JSON file
323 
325 
326  # read json file
327  jsonfile=file(options.json,'r')
328  jsondict = json.load(jsonfile)
329 
330  runs_good = []
331  for run in jsondict.keys(): runs_good.append(int(run))
332  runs_good.sort()
333 
334  #mruns=[]
335  #for run in jsondict.keys():
336  # if int(run)<144115 and int(run)>136034: mruns.append(int(run))
337  #mruns.sort()
338  #print len(mruns),"runs in \n",mruns
339 
340  return runs_good
341 
342 ######################################################
343 # get good B field runs from RunInfo DB
344 
345 runs_b_on = []
346 
347 if options.isMC=='false' and not options.runRegistry and options.json=='':
348  runs_b_on = getGoodBRuns()
349 
350  infotofile.append("### runs with good B field ###\n")
351  infotofile.append("### %s\n" % str(runs_b_on))
352 
353 ######################################################
354 # Add requiremment of good quality runs
355 
356 runs_good_dq = []
357 runs_good = []
358 
359 if options.isMC=='false' and not options.runRegistry and options.json=='':
360  runs_good_dq = getGoodQRuns()
361 
362  infotofile.append("### runs with good quality ###\n")
363  infotofile.append("### %s\n" % str(runs_good_dq))
364 
365  # find intersection of runs_b_on and runs_good_dq
366  runs_good = [val for val in runs_b_on if val in runs_good_dq]
367 
368  print "### runs with good B field and quality ###"
369  print runs_good
370 
371  infotofile.append("### runs with good B field and quality ###\n")
372  infotofile.append("### %s\n" % str(runs_good))
373 
374 ######################################################
375 # use run registry API is specified
376 
377 if options.isMC=='false' and options.runRegistry and options.json=='':
378  runs_good = getRunRegistryGoodRuns()
379  print "### runs with good B field and quality ###"
380  print runs_good
381 
382  #infotofile.append("### runs with good B field and quality ###\n")
383  #infotofile.append("### %s\n" % str(runs_good))
384 
385 ######################################################
386 # use JSON file if specified
387 
388 if options.isMC=='false' and options.json!='':
389  runs_good = getJSONGoodRuns()
390  print "### good runs from JSON file ###"
391  print runs_good
392 
393 ######################################################
394 # Find files for good runs
395 
396 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
397 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
398 
399 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
400 
401 list_of_files = []
402 list_of_runs = []
403 list_of_numevents = []
404 total_numevents = 0
405 
406 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
407 for line in ff:
408  (run, numevents, fname) = line.split(' ')
409  if options.isMC=='false' and (int(run) not in runs_good):
410  continue
411  fname = fname.rstrip('\n')
412  list_of_files.append(fname)
413  list_of_runs.append(int(run))
414  list_of_numevents.append(numevents)
415  total_numevents += int(numevents)
416 ff.close()
417 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
418 
419 uniq_list_of_runs = sorted(set(list_of_runs))
420 
421 print "### list of runs with good B field and quality in the dataset: ###"
422 print uniq_list_of_runs
423 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
424 infotofile.append("### %s\n" % str(uniq_list_of_runs))
425 
426 
427 # prevent against duplication due to the fact now a file can have events from several runs
428 files_events = list(zip(list_of_files, list_of_numevents))
429 unique_files_events = list(set(files_events))
430 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
431 total_numevents = sum( map(int, list_of_numevents) )
432 
433 print "### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents)
434 
435 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
436 
437 ######################################################
438 # Write out results
439 
440 # ff = open(options.outputFile+'.txt','w')
441 size = len(list_of_files)
442 # for i in range(0,size):
443 # ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
444 # ff.close()
445 
446 ff = open(options.outputFile,'w')
447 ff.write("".join(infotofile))
448 ff.write("\nfileNames = [\n")
449 comma = ","
450 for i in range(0,size):
451  if i==size-1:
452  comma=""
453  #ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
454  ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
455 ff.write(']\n')
456 ff.close()
457 
def getRunRegistryGoodRuns()
obtaining list of good B and quality runs from Run Registry https://twiki.cern.ch/twiki/bin/view/CMS/...
def getGoodQRuns()
obtaining list of good quality runs
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def getGoodBRuns()
functions definitions
def getJSONGoodRuns()
obtain a list of good runs from JSON file
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run