CMS 3D CMS Logo

findQualityFiles.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 
8 
9 from __future__ import print_function
10 from builtins import range
11 import os,sys, DLFCN
12 import optparse
13 
14 # for RunInfo API
15 from pluginCondDBPyInterface import *
16 from CondCore.Utilities import iovInspector as inspect
18 
19 # for RunRegistry API
20 import xmlrpclib
21 
22 # for json support
23 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
24  import json
25 except:
26  try:
27  import simplejson as json
28  except:
29  print("Please use lxplus or set an environment (for example crab) with json lib available")
30  sys.exit(1)
31 
32 
33 print("### command line:")
34 copyargs = sys.argv[:]
35 for i in range(len(copyargs)):
36  if copyargs[i] == "":
37  copyargs[i] = "\"\""
38  if copyargs[i].find(" ") != -1:
39  copyargs[i] = "\"%s\"" % copyargs[i]
40 commandline = " ".join(copyargs)
41 
42 print(commandline)
43 infotofile = ["### %s\n" % commandline]
44 
45 
47 
48 usage='%prog [options]\n\n'+\
49  'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
50 
51 parser=optparse.OptionParser(usage)
52 
53 parser.add_option("-d", "--alcaDataset",
54  help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
55  type="string",
56  #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
57  #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
58  default='',
59  dest="alcaDataset")
60 
61 parser.add_option("-m", "--isMC",
62  help="Whether sample is MC (true) or real data (false).",
63  type="string",
64  default="false",
65  dest="isMC")
66 
67 parser.add_option("-s", "--startRun",
68  help="First run number in range.",
69  type="int",
70  default=0,
71  dest="startRun")
72 
73 parser.add_option("-e", "--endRun",
74  help="Last run number in range.",
75  type="int",
76  default=999999999,
77  dest="endRun")
78 
79 parser.add_option("-b", "--minB",
80  help="Lower limit on minimal B field for a run.",
81  type="float",
82  #default=3.77,
83  default=0.,
84  dest="minB")
85 
86 parser.add_option("--maxB",
87  help="Upper limit on B field for a run.",
88  type="float",
89  default=999.,
90  dest="maxB")
91 
92 parser.add_option("-r","--runRegistry",
93  help="If present, use RunRegistry API for B field and data quality quiery",
94  action="store_true",
95  default=False,
96  dest="runRegistry")
97 
98 parser.add_option("-j","--json",
99  help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
100  "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
101  type="string",
102  default="",
103  dest="json")
104 
105 parser.add_option("-t", "--dbTag",
106  help="Runinfo DB tag to use.",
107  type="string",
108  default="runinfo_31X_hlt",
109  dest="dbTag")
110 
111 parser.add_option("--printTags",
112  help="If present, the only thing script will do is printing list of tags in the DB",
113  action="store_true",
114  default=False,
115  dest="printTags")
116 
117 parser.add_option("--dbName",
118  help="RunInfo DB name to use. The default one is "+\
119  "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
120  type="string",
121  default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
122  dest="dbName")
123 
124 parser.add_option("--dqDataset",
125  help="Dataset name to query for good data quality runs. "+\
126  "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
127  "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
128  "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
129  type="string",
130  #default="/Cosmics/Commissioning08-v1/RAW",
131  #default="/Cosmics/CRAFT09-v1/RAW",
132  default="",
133  dest="dqDataset")
134 
135 parser.add_option("-c", "--dqCriteria",
136  help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
137  "An example of a really strict condition:\n"
138  "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
139  "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
140  "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
141  type="string",
142  #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
143  #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
144  #default="DT_Shift_Offline=GOOD",
145  default="",
146  dest="dqCriteria")
147 
148 parser.add_option("-o", "--outputFile",
149  help="Name for output file (please include the .py suffix)",
150  type="string",
151  default="filelist.py",
152  dest="outputFile")
153 
154 parser.add_option("-v", "--verbose",
155  help="Degree of debug info verbosity",
156  type="int",
157  default=0,
158  dest="verbose")
159 
160 options,args=parser.parse_args()
161 
162 #if '' in (options.infilename,
163 # options.outfilename,
164 # options.outputCommands):
165 # raise ('Incomplete list of arguments!')
166 
167 
168 if options.alcaDataset=='' and not options.printTags:
169  print("--alcaDataset /your/dataset/name is required!")
170  sys.exit()
171 
172 if options.dqDataset=='':
173  options.dqDataset = options.alcaDataset
174 
175 if not (options.isMC=='true' or options.isMC=='false'):
176  print("--isMC option can have only 'true' or 'false' arguments")
177  sys.exit()
178 
179 v = options.verbose
180 
181 minI = options.minB*18160/3.8
182 maxI = options.maxB*18160/3.8
183 
184 
185 rr = ''
186 if options.runRegistry: rr = ' --runRegistry'
187 
188 jj = ''
189 if options.json!='': jj = ' --json '+options.json
190 
191 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
192  ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
193  ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
194  ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
195  ' --outputFile ' + options.outputFile
196 
197 print("### all options, including default:")
198 print(allOptions)
199 
200 
201 
203 
204 
205 
208 
209  runs_b_on = []
210 
211  sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
212 
213  a = FWIncantation()
214  #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
215  rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
216 
217  db = rdbms.getDB(options.dbName)
218  tags = db.allTags()
219 
220  if options.printTags:
221  print("\nOverview of all tags in "+options.dbName+" :\n")
222  print(tags)
223  print("\n")
224  sys.exit()
225 
226  # for inspecting last run after run has started
227  #tag = 'runinfo_31X_hlt'
228  tag = options.dbTag
229 
230  # for inspecting last run after run has stopped
231  #tag = 'runinfo_test'
232 
233  try :
234  #log = db.lastLogEntry(tag)
235 
236  #for printing all log info present into log db
237  #print log.getState()
238 
239  iov = inspect.Iov(db,tag)
240  #print "########overview of tag "+tag+"########"
241  #print iov.list()
242 
243  if v>1 :
244  print("######## summries ########")
245  for x in iov.summaries():
246  print(x[0], x[1], x[2] ,x[3])
247 
248  what={}
249 
250  if v>1 :
251  print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
252  print(iov.trend(what))
253 
254  if v>0:
255  print("######## trends ########")
256  for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
257  if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
258  print(x[0],x[1] ,x[2], x[2][4], x[2][3])
259  #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
260  if x[2][4] >= minI and x[2][3] <= maxI:
261  runs_b_on.append(int(x[0]))
262 
263  except Exception as er :
264  print(er)
265 
266  print("### runs with good B field ###")
267  print(runs_b_on)
268 
269  return runs_b_on
270 
271 
272 
274 
276 
277  runs_good_dq = []
278 
279  dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
280  print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
281 
282  os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
283 
284  #print 'python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
285 
286  ff = open('/tmp/runs_full_of_pink_bunnies', "r")
287  line = ff.readline()
288  while line and line!='':
289  runs_good_dq.append(int(line))
290  line = ff.readline()
291  ff.close()
292 
293  os.system('rm /tmp/runs_full_of_pink_bunnies')
294 
295  print("### runs with good quality ###")
296  print(runs_good_dq)
297 
298  return runs_good_dq
299 
300 
304 
306 
307  server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
308 
309  rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
310  " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
311  if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
312 
313  rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
314  rrstr = rrstr.replace("bfield","'bfield'")
315  rrstr = rrstr.replace("events","'events'")
316  rrdata = eval(rrstr)
317 
318  runs_good = []
319  for rr in rrdata['events']: runs_good.append(rr[0])
320 
321  return runs_good
322 
323 
325 
327 
328  # read json file
329  jsonfile=file(options.json,'r')
330  jsondict = json.load(jsonfile)
331 
332  runs_good = []
333  for run in jsondict.keys(): runs_good.append(int(run))
334  runs_good.sort()
335 
336  #mruns=[]
337  #for run in jsondict.keys():
338  # if int(run)<144115 and int(run)>136034: mruns.append(int(run))
339  #mruns.sort()
340  #print len(mruns),"runs in \n",mruns
341 
342  return runs_good
343 
344 
346 
347 runs_b_on = []
348 
349 if options.isMC=='false' and not options.runRegistry and options.json=='':
350  runs_b_on = getGoodBRuns()
351 
352  infotofile.append("### runs with good B field ###\n")
353  infotofile.append("### %s\n" % str(runs_b_on))
354 
355 
357 
358 runs_good_dq = []
359 runs_good = []
360 
361 if options.isMC=='false' and not options.runRegistry and options.json=='':
362  runs_good_dq = getGoodQRuns()
363 
364  infotofile.append("### runs with good quality ###\n")
365  infotofile.append("### %s\n" % str(runs_good_dq))
366 
367  # find intersection of runs_b_on and runs_good_dq
368  runs_good = [val for val in runs_b_on if val in runs_good_dq]
369 
370  print("### runs with good B field and quality ###")
371  print(runs_good)
372 
373  infotofile.append("### runs with good B field and quality ###\n")
374  infotofile.append("### %s\n" % str(runs_good))
375 
376 
378 
379 if options.isMC=='false' and options.runRegistry and options.json=='':
380  runs_good = getRunRegistryGoodRuns()
381  print("### runs with good B field and quality ###")
382  print(runs_good)
383 
384  #infotofile.append("### runs with good B field and quality ###\n")
385  #infotofile.append("### %s\n" % str(runs_good))
386 
387 
389 
390 if options.isMC=='false' and options.json!='':
391  runs_good = getJSONGoodRuns()
392  print("### good runs from JSON file ###")
393  print(runs_good)
394 
395 
397 
398 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
399 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
400 
401 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
402 
403 list_of_files = []
404 list_of_runs = []
405 list_of_numevents = []
406 total_numevents = 0
407 
408 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
409 for line in ff:
410  (run, numevents, fname) = line.split(' ')
411  if options.isMC=='false' and (int(run) not in runs_good):
412  continue
413  fname = fname.rstrip('\n')
414  list_of_files.append(fname)
415  list_of_runs.append(int(run))
416  list_of_numevents.append(numevents)
417  total_numevents += int(numevents)
418 ff.close()
419 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
420 
421 uniq_list_of_runs = sorted(set(list_of_runs))
422 
423 print("### list of runs with good B field and quality in the dataset: ###")
424 print(uniq_list_of_runs)
425 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
426 infotofile.append("### %s\n" % str(uniq_list_of_runs))
427 
428 
429 # prevent against duplication due to the fact now a file can have events from several runs
430 files_events = list(zip(list_of_files, list_of_numevents))
431 unique_files_events = list(set(files_events))
432 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
433 total_numevents = sum( map(int, list_of_numevents) )
434 
435 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
436 
437 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
438 
439 
441 
442 # ff = open(options.outputFile+'.txt','w')
443 size = len(list_of_files)
444 # for i in range(0,size):
445 # ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
446 # ff.close()
447 
448 ff = open(options.outputFile,'w')
449 ff.write("".join(infotofile))
450 ff.write("\nfileNames = [\n")
451 comma = ","
452 for i in range(0,size):
453  if i==size-1:
454  comma=""
455  #ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
456  ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
457 ff.write(']\n')
458 ff.close()
459 
FastTimerService_cff.range
range
Definition: FastTimerService_cff.py:34
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
findQualityFiles.getGoodQRuns
def getGoodQRuns()
obtaining list of good quality runs
Definition: findQualityFiles.py:275
spr::find
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
str
#define str(s)
Definition: TestProcessor.cc:52
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
createfilelist.int
int
Definition: createfilelist.py:10
FrontierConditions_GlobalTag_cff.file
file
Definition: FrontierConditions_GlobalTag_cff.py:13
ComparisonHelper::zip
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
Definition: L1TStage2CaloLayer1.h:41
findQualityFiles.getJSONGoodRuns
def getJSONGoodRuns()
obtain a list of good runs from JSON file
Definition: findQualityFiles.py:326
findQualityFiles.getGoodBRuns
def getGoodBRuns()
functions definitions
Definition: findQualityFiles.py:207
timeUnitHelper
genParticles_cff.map
map
Definition: genParticles_cff.py:11
findQualityFiles.getRunRegistryGoodRuns
def getRunRegistryGoodRuns()
obtaining list of good B and quality runs from Run Registry https://twiki.cern.ch/twiki/bin/view/CMS/...
Definition: findQualityFiles.py:305