CMS 3D CMS Logo

findQualityFiles.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 ######################################################
4 ### See documentation at
5 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
6 ### also run it with -h option
7 ######################################################
8 
9 from __future__ import print_function
10 from builtins import range
11 import os,sys, DLFCN
12 import optparse
13 
14 # for RunInfo API
15 from pluginCondDBPyInterface import *
16 from CondCore.Utilities import iovInspector as inspect
18 
19 # for RunRegistry API
20 import xmlrpclib
21 
22 # for json support
23 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
24  import json
25 except:
26  try:
27  import simplejson as json
28  except:
29  print("Please use lxplus or set an environment (for example crab) with json lib available")
30  sys.exit(1)
31 
32 ######################################################
33 print("### command line:")
34 copyargs = sys.argv[:]
35 for i in range(len(copyargs)):
36  if copyargs[i] == "":
37  copyargs[i] = "\"\""
38  if copyargs[i].find(" ") != -1:
39  copyargs[i] = "\"%s\"" % copyargs[i]
40 commandline = " ".join(copyargs)
41 
42 print(commandline)
43 infotofile = ["### %s\n" % commandline]
44 
45 ######################################################
46 # To parse commandline args
47 
48 usage='%prog [options]\n\n'+\
49  'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
50 
51 parser=optparse.OptionParser(usage)
52 
53 parser.add_option("-d", "--alcaDataset",
54  help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
55  type="string",
56  #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
57  #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
58  default='',
59  dest="alcaDataset")
60 
61 parser.add_option("-m", "--isMC",
62  help="Whether sample is MC (true) or real data (false).",
63  type="string",
64  default="false",
65  dest="isMC")
66 
67 parser.add_option("-s", "--startRun",
68  help="First run number in range.",
69  type="int",
70  default=0,
71  dest="startRun")
72 
73 parser.add_option("-e", "--endRun",
74  help="Last run number in range.",
75  type="int",
76  default=999999999,
77  dest="endRun")
78 
79 parser.add_option("-b", "--minB",
80  help="Lower limit on minimal B field for a run.",
81  type="float",
82  #default=3.77,
83  default=0.,
84  dest="minB")
85 
86 parser.add_option("--maxB",
87  help="Upper limit on B field for a run.",
88  type="float",
89  default=999.,
90  dest="maxB")
91 
92 parser.add_option("-r","--runRegistry",
93  help="If present, use RunRegistry API for B field and data quality quiery",
94  action="store_true",
95  default=False,
96  dest="runRegistry")
97 
98 parser.add_option("-j","--json",
99  help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
100  "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
101  type="string",
102  default="",
103  dest="json")
104 
105 parser.add_option("-t", "--dbTag",
106  help="Runinfo DB tag to use.",
107  type="string",
108  default="runinfo_31X_hlt",
109  dest="dbTag")
110 
111 parser.add_option("--printTags",
112  help="If present, the only thing script will do is printing list of tags in the DB",
113  action="store_true",
114  default=False,
115  dest="printTags")
116 
117 parser.add_option("--dbName",
118  help="RunInfo DB name to use. The default one is "+\
119  "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
120  type="string",
121  default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
122  dest="dbName")
123 
124 parser.add_option("--dqDataset",
125  help="Dataset name to query for good data quality runs. "+\
126  "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
127  "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
128  "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
129  type="string",
130  #default="/Cosmics/Commissioning08-v1/RAW",
131  #default="/Cosmics/CRAFT09-v1/RAW",
132  default="",
133  dest="dqDataset")
134 
135 parser.add_option("-c", "--dqCriteria",
136  help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
137  "An example of a really strict condition:\n"
138  "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
139  "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
140  "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
141  type="string",
142  #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
143  #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
144  #default="DT_Shift_Offline=GOOD",
145  default="",
146  dest="dqCriteria")
147 
148 parser.add_option("-o", "--outputFile",
149  help="Name for output file (please include the .py suffix)",
150  type="string",
151  default="filelist.py",
152  dest="outputFile")
153 
154 parser.add_option("-v", "--verbose",
155  help="Degree of debug info verbosity",
156  type="int",
157  default=0,
158  dest="verbose")
159 
160 options,args=parser.parse_args()
161 
162 #if '' in (options.infilename,
163 # options.outfilename,
164 # options.outputCommands):
165 # raise ('Incomplete list of arguments!')
166 
167 
168 if options.alcaDataset=='' and not options.printTags:
169  print("--alcaDataset /your/dataset/name is required!")
170  sys.exit()
171 
172 if options.dqDataset=='':
173  options.dqDataset = options.alcaDataset
174 
175 if not (options.isMC=='true' or options.isMC=='false'):
176  print("--isMC option can have only 'true' or 'false' arguments")
177  sys.exit()
178 
179 v = options.verbose
180 
181 minI = options.minB*18160/3.8
182 maxI = options.maxB*18160/3.8
183 
184 
185 rr = ''
186 if options.runRegistry: rr = ' --runRegistry'
187 
188 jj = ''
189 if options.json!='': jj = ' --json '+options.json
190 
191 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
192  ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
193  ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
194  ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
195  ' --outputFile ' + options.outputFile
196 
197 print("### all options, including default:")
198 print(allOptions)
199 
200 
201 ######################################################
202 # functions definitions
203 
204 
205 #########################
206 # get good B field runs from RunInfo DB
208 
209  runs_b_on = []
210 
211  sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
212 
213  a = FWIncantation()
214  #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
215  rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
216 
217  db = rdbms.getDB(options.dbName)
218  tags = db.allTags()
219 
220  if options.printTags:
221  print("\nOverview of all tags in "+options.dbName+" :\n")
222  print(tags)
223  print("\n")
224  sys.exit()
225 
226  # for inspecting last run after run has started
227  #tag = 'runinfo_31X_hlt'
228  tag = options.dbTag
229 
230  # for inspecting last run after run has stopped
231  #tag = 'runinfo_test'
232 
233  try :
234  #log = db.lastLogEntry(tag)
235 
236  #for printing all log info present into log db
237  #print log.getState()
238 
239  iov = inspect.Iov(db,tag)
240  #print "########overview of tag "+tag+"########"
241  #print iov.list()
242 
243  if v>1 :
244  print("######## summries ########")
245  for x in iov.summaries():
246  print(x[0], x[1], x[2] ,x[3])
247 
248  what={}
249 
250  if v>1 :
251  print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
252  print(iov.trend(what))
253 
254  if v>0:
255  print("######## trends ########")
256  for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
257  if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
258  print(x[0],x[1] ,x[2], x[2][4], x[2][3])
259  #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
260  if x[2][4] >= minI and x[2][3] <= maxI:
261  runs_b_on.append(int(x[0]))
262 
263  except Exception as er :
264  print(er)
265 
266  print("### runs with good B field ###")
267  print(runs_b_on)
268 
269  return runs_b_on
270 
271 
272 #########################
273 # obtaining list of good quality runs
274 
276 
277  runs_good_dq = []
278 
279  dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
280  print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
281 
282  os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
283 
284  #print 'python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
285 
286  ff = open('/tmp/runs_full_of_pink_bunnies', "r")
287  line = ff.readline()
288  while line and line!='':
289  runs_good_dq.append(int(line))
290  line = ff.readline()
291  ff.close()
292 
293  os.system('rm /tmp/runs_full_of_pink_bunnies')
294 
295  print("### runs with good quality ###")
296  print(runs_good_dq)
297 
298  return runs_good_dq
299 
300 #########################
301 # obtaining list of good B and quality runs from Run Registry
302 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
303 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
304 
306 
307  server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
308 
309  rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
310  " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
311  if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
312 
313  rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
314  rrstr = rrstr.replace("bfield","'bfield'")
315  rrstr = rrstr.replace("events","'events'")
316  rrdata = eval(rrstr)
317 
318  runs_good = []
319  for rr in rrdata['events']: runs_good.append(rr[0])
320 
321  return runs_good
322 
323 #########################
324 # obtain a list of good runs from JSON file
325 
327 
328  # read json file
329  jsonfile=file(options.json,'r')
330  jsondict = json.load(jsonfile)
331 
332  runs_good = []
333  for run in jsondict.keys(): runs_good.append(int(run))
334  runs_good.sort()
335 
336  #mruns=[]
337  #for run in jsondict.keys():
338  # if int(run)<144115 and int(run)>136034: mruns.append(int(run))
339  #mruns.sort()
340  #print len(mruns),"runs in \n",mruns
341 
342  return runs_good
343 
344 ######################################################
345 # get good B field runs from RunInfo DB
346 
347 runs_b_on = []
348 
349 if options.isMC=='false' and not options.runRegistry and options.json=='':
350  runs_b_on = getGoodBRuns()
351 
352  infotofile.append("### runs with good B field ###\n")
353  infotofile.append("### %s\n" % str(runs_b_on))
354 
355 ######################################################
356 # Add requiremment of good quality runs
357 
358 runs_good_dq = []
359 runs_good = []
360 
361 if options.isMC=='false' and not options.runRegistry and options.json=='':
362  runs_good_dq = getGoodQRuns()
363 
364  infotofile.append("### runs with good quality ###\n")
365  infotofile.append("### %s\n" % str(runs_good_dq))
366 
367  # find intersection of runs_b_on and runs_good_dq
368  runs_good = [val for val in runs_b_on if val in runs_good_dq]
369 
370  print("### runs with good B field and quality ###")
371  print(runs_good)
372 
373  infotofile.append("### runs with good B field and quality ###\n")
374  infotofile.append("### %s\n" % str(runs_good))
375 
376 ######################################################
377 # use run registry API is specified
378 
379 if options.isMC=='false' and options.runRegistry and options.json=='':
380  runs_good = getRunRegistryGoodRuns()
381  print("### runs with good B field and quality ###")
382  print(runs_good)
383 
384  #infotofile.append("### runs with good B field and quality ###\n")
385  #infotofile.append("### %s\n" % str(runs_good))
386 
387 ######################################################
388 # use JSON file if specified
389 
390 if options.isMC=='false' and options.json!='':
391  runs_good = getJSONGoodRuns()
392  print("### good runs from JSON file ###")
393  print(runs_good)
394 
395 ######################################################
396 # Find files for good runs
397 
398 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
399 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
400 
401 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
402 
403 list_of_files = []
404 list_of_runs = []
405 list_of_numevents = []
406 total_numevents = 0
407 
408 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
409 for line in ff:
410  (run, numevents, fname) = line.split(' ')
411  if options.isMC=='false' and (int(run) not in runs_good):
412  continue
413  fname = fname.rstrip('\n')
414  list_of_files.append(fname)
415  list_of_runs.append(int(run))
416  list_of_numevents.append(numevents)
417  total_numevents += int(numevents)
418 ff.close()
419 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
420 
421 uniq_list_of_runs = sorted(set(list_of_runs))
422 
423 print("### list of runs with good B field and quality in the dataset: ###")
424 print(uniq_list_of_runs)
425 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
426 infotofile.append("### %s\n" % str(uniq_list_of_runs))
427 
428 
429 # prevent against duplication due to the fact now a file can have events from several runs
430 files_events = list(zip(list_of_files, list_of_numevents))
431 unique_files_events = list(set(files_events))
432 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
433 total_numevents = sum( map(int, list_of_numevents) )
434 
435 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
436 
437 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
438 
439 ######################################################
440 # Write out results
441 
442 # ff = open(options.outputFile+'.txt','w')
443 size = len(list_of_files)
444 # for i in range(0,size):
445 # ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
446 # ff.close()
447 
448 ff = open(options.outputFile,'w')
449 ff.write("".join(infotofile))
450 ff.write("\nfileNames = [\n")
451 comma = ","
452 for i in range(0,size):
453  if i==size-1:
454  comma=""
455  #ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
456  ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
457 ff.write(']\n')
458 ff.close()
459 
def getRunRegistryGoodRuns()
obtaining list of good B and quality runs from Run Registry https://twiki.cern.ch/twiki/bin/view/CMS/...
def getGoodQRuns()
obtaining list of good quality runs
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def getGoodBRuns()
functions definitions
def getJSONGoodRuns()
obtain a list of good runs from JSON file
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run