CMS 3D CMS Logo

findQualityFiles.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 ######################################################
4 ### See documentation at
5 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
6 ### also run it with -h option
7 ######################################################
8 
9 from __future__ import print_function
10 import os,sys, DLFCN
11 import optparse
12 
13 # for RunInfo API
14 from pluginCondDBPyInterface import *
15 from CondCore.Utilities import iovInspector as inspect
17 
18 # for RunRegistry API
19 import xmlrpclib
20 
21 # for json support
22 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
23  import json
24 except:
25  try:
26  import simplejson as json
27  except:
28  print("Please use lxplus or set an environment (for example crab) with json lib available")
29  sys.exit(1)
30 
31 ######################################################
32 print("### command line:")
33 copyargs = sys.argv[:]
34 for i in range(len(copyargs)):
35  if copyargs[i] == "":
36  copyargs[i] = "\"\""
37  if copyargs[i].find(" ") != -1:
38  copyargs[i] = "\"%s\"" % copyargs[i]
39 commandline = " ".join(copyargs)
40 
41 print(commandline)
42 infotofile = ["### %s\n" % commandline]
43 
44 ######################################################
45 # To parse commandline args
46 
47 usage='%prog [options]\n\n'+\
48  'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
49 
50 parser=optparse.OptionParser(usage)
51 
52 parser.add_option("-d", "--alcaDataset",
53  help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
54  type="string",
55  #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
56  #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
57  default='',
58  dest="alcaDataset")
59 
60 parser.add_option("-m", "--isMC",
61  help="Whether sample is MC (true) or real data (false).",
62  type="string",
63  default="false",
64  dest="isMC")
65 
66 parser.add_option("-s", "--startRun",
67  help="First run number in range.",
68  type="int",
69  default=0,
70  dest="startRun")
71 
72 parser.add_option("-e", "--endRun",
73  help="Last run number in range.",
74  type="int",
75  default=999999999,
76  dest="endRun")
77 
78 parser.add_option("-b", "--minB",
79  help="Lower limit on minimal B field for a run.",
80  type="float",
81  #default=3.77,
82  default=0.,
83  dest="minB")
84 
85 parser.add_option("--maxB",
86  help="Upper limit on B field for a run.",
87  type="float",
88  default=999.,
89  dest="maxB")
90 
91 parser.add_option("-r","--runRegistry",
92  help="If present, use RunRegistry API for B field and data quality quiery",
93  action="store_true",
94  default=False,
95  dest="runRegistry")
96 
97 parser.add_option("-j","--json",
98  help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
99  "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
100  type="string",
101  default="",
102  dest="json")
103 
104 parser.add_option("-t", "--dbTag",
105  help="Runinfo DB tag to use.",
106  type="string",
107  default="runinfo_31X_hlt",
108  dest="dbTag")
109 
110 parser.add_option("--printTags",
111  help="If present, the only thing script will do is printing list of tags in the DB",
112  action="store_true",
113  default=False,
114  dest="printTags")
115 
116 parser.add_option("--dbName",
117  help="RunInfo DB name to use. The default one is "+\
118  "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
119  type="string",
120  default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
121  dest="dbName")
122 
123 parser.add_option("--dqDataset",
124  help="Dataset name to query for good data quality runs. "+\
125  "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
126  "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
127  "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
128  type="string",
129  #default="/Cosmics/Commissioning08-v1/RAW",
130  #default="/Cosmics/CRAFT09-v1/RAW",
131  default="",
132  dest="dqDataset")
133 
134 parser.add_option("-c", "--dqCriteria",
135  help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
136  "An example of a really strict condition:\n"
137  "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
138  "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
139  "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
140  type="string",
141  #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
142  #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
143  #default="DT_Shift_Offline=GOOD",
144  default="",
145  dest="dqCriteria")
146 
147 parser.add_option("-o", "--outputFile",
148  help="Name for output file (please include the .py suffix)",
149  type="string",
150  default="filelist.py",
151  dest="outputFile")
152 
153 parser.add_option("-v", "--verbose",
154  help="Degree of debug info verbosity",
155  type="int",
156  default=0,
157  dest="verbose")
158 
159 options,args=parser.parse_args()
160 
161 #if '' in (options.infilename,
162 # options.outfilename,
163 # options.outputCommands):
164 # raise ('Incomplete list of arguments!')
165 
166 
167 if options.alcaDataset=='' and not options.printTags:
168  print("--alcaDataset /your/dataset/name is required!")
169  sys.exit()
170 
171 if options.dqDataset=='':
172  options.dqDataset = options.alcaDataset
173 
174 if not (options.isMC=='true' or options.isMC=='false'):
175  print("--isMC option can have only 'true' or 'false' arguments")
176  sys.exit()
177 
178 v = options.verbose
179 
180 minI = options.minB*18160/3.8
181 maxI = options.maxB*18160/3.8
182 
183 
184 rr = ''
185 if options.runRegistry: rr = ' --runRegistry'
186 
187 jj = ''
188 if options.json!='': jj = ' --json '+options.json
189 
190 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
191  ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
192  ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
193  ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
194  ' --outputFile ' + options.outputFile
195 
196 print("### all options, including default:")
197 print(allOptions)
198 
199 
200 ######################################################
201 # functions definitions
202 
203 
204 #########################
205 # get good B field runs from RunInfo DB
207 
208  runs_b_on = []
209 
210  sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
211 
212  a = FWIncantation()
213  #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
214  rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
215 
216  db = rdbms.getDB(options.dbName)
217  tags = db.allTags()
218 
219  if options.printTags:
220  print("\nOverview of all tags in "+options.dbName+" :\n")
221  print(tags)
222  print("\n")
223  sys.exit()
224 
225  # for inspecting last run after run has started
226  #tag = 'runinfo_31X_hlt'
227  tag = options.dbTag
228 
229  # for inspecting last run after run has stopped
230  #tag = 'runinfo_test'
231 
232  try :
233  #log = db.lastLogEntry(tag)
234 
235  #for printing all log info present into log db
236  #print log.getState()
237 
238  iov = inspect.Iov(db,tag)
239  #print "########overview of tag "+tag+"########"
240  #print iov.list()
241 
242  if v>1 :
243  print("######## summries ########")
244  for x in iov.summaries():
245  print(x[0], x[1], x[2] ,x[3])
246 
247  what={}
248 
249  if v>1 :
250  print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
251  print(iov.trend(what))
252 
253  if v>0:
254  print("######## trends ########")
255  for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
256  if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
257  print(x[0],x[1] ,x[2], x[2][4], x[2][3])
258  #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
259  if x[2][4] >= minI and x[2][3] <= maxI:
260  runs_b_on.append(int(x[0]))
261 
262  except Exception as er :
263  print(er)
264 
265  print("### runs with good B field ###")
266  print(runs_b_on)
267 
268  return runs_b_on
269 
270 
271 #########################
272 # obtaining list of good quality runs
273 
275 
276  runs_good_dq = []
277 
278  dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
279  print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
280 
281  os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
282 
283  #print 'python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
284 
285  ff = open('/tmp/runs_full_of_pink_bunnies', "r")
286  line = ff.readline()
287  while line and line!='':
288  runs_good_dq.append(int(line))
289  line = ff.readline()
290  ff.close()
291 
292  os.system('rm /tmp/runs_full_of_pink_bunnies')
293 
294  print("### runs with good quality ###")
295  print(runs_good_dq)
296 
297  return runs_good_dq
298 
299 #########################
300 # obtaining list of good B and quality runs from Run Registry
301 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
302 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
303 
305 
306  server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
307 
308  rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
309  " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
310  if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
311 
312  rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
313  rrstr = rrstr.replace("bfield","'bfield'")
314  rrstr = rrstr.replace("events","'events'")
315  rrdata = eval(rrstr)
316 
317  runs_good = []
318  for rr in rrdata['events']: runs_good.append(rr[0])
319 
320  return runs_good
321 
322 #########################
323 # obtain a list of good runs from JSON file
324 
326 
327  # read json file
328  jsonfile=file(options.json,'r')
329  jsondict = json.load(jsonfile)
330 
331  runs_good = []
332  for run in jsondict.keys(): runs_good.append(int(run))
333  runs_good.sort()
334 
335  #mruns=[]
336  #for run in jsondict.keys():
337  # if int(run)<144115 and int(run)>136034: mruns.append(int(run))
338  #mruns.sort()
339  #print len(mruns),"runs in \n",mruns
340 
341  return runs_good
342 
343 ######################################################
344 # get good B field runs from RunInfo DB
345 
346 runs_b_on = []
347 
348 if options.isMC=='false' and not options.runRegistry and options.json=='':
349  runs_b_on = getGoodBRuns()
350 
351  infotofile.append("### runs with good B field ###\n")
352  infotofile.append("### %s\n" % str(runs_b_on))
353 
354 ######################################################
355 # Add requiremment of good quality runs
356 
357 runs_good_dq = []
358 runs_good = []
359 
360 if options.isMC=='false' and not options.runRegistry and options.json=='':
361  runs_good_dq = getGoodQRuns()
362 
363  infotofile.append("### runs with good quality ###\n")
364  infotofile.append("### %s\n" % str(runs_good_dq))
365 
366  # find intersection of runs_b_on and runs_good_dq
367  runs_good = [val for val in runs_b_on if val in runs_good_dq]
368 
369  print("### runs with good B field and quality ###")
370  print(runs_good)
371 
372  infotofile.append("### runs with good B field and quality ###\n")
373  infotofile.append("### %s\n" % str(runs_good))
374 
375 ######################################################
376 # use run registry API is specified
377 
378 if options.isMC=='false' and options.runRegistry and options.json=='':
379  runs_good = getRunRegistryGoodRuns()
380  print("### runs with good B field and quality ###")
381  print(runs_good)
382 
383  #infotofile.append("### runs with good B field and quality ###\n")
384  #infotofile.append("### %s\n" % str(runs_good))
385 
386 ######################################################
387 # use JSON file if specified
388 
389 if options.isMC=='false' and options.json!='':
390  runs_good = getJSONGoodRuns()
391  print("### good runs from JSON file ###")
392  print(runs_good)
393 
394 ######################################################
395 # Find files for good runs
396 
397 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
398 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
399 
400 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
401 
402 list_of_files = []
403 list_of_runs = []
404 list_of_numevents = []
405 total_numevents = 0
406 
407 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
408 for line in ff:
409  (run, numevents, fname) = line.split(' ')
410  if options.isMC=='false' and (int(run) not in runs_good):
411  continue
412  fname = fname.rstrip('\n')
413  list_of_files.append(fname)
414  list_of_runs.append(int(run))
415  list_of_numevents.append(numevents)
416  total_numevents += int(numevents)
417 ff.close()
418 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
419 
420 uniq_list_of_runs = sorted(set(list_of_runs))
421 
422 print("### list of runs with good B field and quality in the dataset: ###")
423 print(uniq_list_of_runs)
424 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
425 infotofile.append("### %s\n" % str(uniq_list_of_runs))
426 
427 
428 # prevent against duplication due to the fact now a file can have events from several runs
429 files_events = list(zip(list_of_files, list_of_numevents))
430 unique_files_events = list(set(files_events))
431 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
432 total_numevents = sum( map(int, list_of_numevents) )
433 
434 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
435 
436 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
437 
438 ######################################################
439 # Write out results
440 
441 # ff = open(options.outputFile+'.txt','w')
442 size = len(list_of_files)
443 # for i in range(0,size):
444 # ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
445 # ff.close()
446 
447 ff = open(options.outputFile,'w')
448 ff.write("".join(infotofile))
449 ff.write("\nfileNames = [\n")
450 comma = ","
451 for i in range(0,size):
452  if i==size-1:
453  comma=""
454  #ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
455  ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
456 ff.write(']\n')
457 ff.close()
458 
def getRunRegistryGoodRuns()
obtaining list of good B and quality runs from Run Registry https://twiki.cern.ch/twiki/bin/view/CMS/...
def getGoodQRuns()
obtaining list of good quality runs
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def getGoodBRuns()
functions definitions
def getJSONGoodRuns()
obtain a list of good runs from JSON file
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run