CMS 3D CMS Logo

harvestRelVal.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 import sys
5 import os
6 
7 """
8 arguments [<list-of-processes>]
9 description:
10 creates crab.cfg, multicrab.cfg, harvest_*.py
11 if dbs is set:
12  prints number of events found in dataset
13  if no argument is provided looks for all available datsets for release
14  user can edit multicrab and confirm process list as needed
15 nuno@cern.ch 09.04
16 """
17 
18 def print_def():
19  print("Usage:", sys.argv[0], "[list_of_processes]")
20  print("Examples:")
21  print("harvestRelVal.py")
22  print("harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO")
23  print("harvestRelVal.py <dataset_list.txt>")
24 
25 def check_dbs():
26  if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
27  return 0
28  return 1
29 
30 def check_nevts_dset(dset):
31  if not is_dbs :
32  return -1
33  ntot=0
34  for afile in api.listFiles(path=str(dset)):
35  nevts = afile['NumberOfEvents']
36  ntot += nevts
37  #print " %s" % afile['LogicalFileName']
38  return ntot
39 
40 def make_dqmname(s):
41  return 'DQM_V0001_R000000001' + s.replace('/','__') + '.root'
42 
44  fs = ds.split('/')
45  fa = fs[1].replace('RelVal','')
46  return fa
47 
49  ca = ds.split('/')[2].replace(cmssw_ver+'_','').replace('IDEAL_','').replace('STARTUP_','').replace('_FastSim','')
50  cb = ca[:ca.find('v')-1]
51  if cb[0].find('3') == -1 or len(cb) > 3:
52  print("problem extracting condition for", ds, " : ", cb, '(len:',len(cb),')')
53  if cb.find('31X') != -1:
54  cb = '31X'
55  elif cb.find('30X') != -1:
56  cb = '30X'
57  else:
58  print("skipping", cb)
59  return 0
60  print("condition found:", cb)
61  else :
62  print("good condition for", ds, " : ", cb, '(len:',len(cb),')')
63  return cb
64 
65 
66 def make_dbs_list(dbslf) :
67  if not is_dbs :
68  return
69  flis = open(dbslf,'w')
70  for ads in api.listDatasetPaths() :
71  if ads.find('RelVal') != -1 \
72  or ads.find(cmssw_ver) != -1 \
73  or ads.find("/GEN-SIM") != -1 :
74 # and ads.find("/GEN-SIM-RECO") != -1 :
75  flis.write(ads + '\n')
76  flis.close()
77  print('Generated dataset list', dbslf, 'from dbs.')
78  #exampe:
79  #dbs lsd --path=/RelVal*/CMSSW_3_1_0_pre5*/GEN-SIM-RECO --url=http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet > mylist.txt
80  #dbslsd = "dbs lsd --path=/RelVal*/" + cmssw_ver + "*/GEN-SIM-RECO --url=http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
81  #os.system( '`' + dbslsd + ' > ' + dbslf + '`')
82 
83 def read_ds_file() :
84  if not os.path.exists(dsfile) :
85  print("problem reading file", dsfile)
86  sys.exit(30)
87  fin = open(dsfile,'r')
88  for dset in fin.readlines():
89  d = dset.replace('\n','')
90  if d.find('#') == -1 :
91  dsetpaths.append(d)
92  else :
93  print('skipping:', d)
94  fin.close()
95  print('Using data set list in ', dsfile)
96 
97 def check_dset() :
98  #check cmssw consistency
99  for s in dsetpaths:
100  if s.find(cmssw_ver) == -1 :
101  dsetpaths.remove(s)
102  print('Inconsistency found with datset and cmssw version (', cmssw_ver, ')' \
103  ': \t ', s, ' has been removed.')
104  #check conditions from dsetname
105  for s in dsetpaths[:]: #nb:need to make a copy here!
106  cond = get_cond_from_dsetpath(s)
107  if cond == 0 :
108  dsetpaths.remove(s)
109  #check list size
110  nSamples = len(dsetpaths)
111  if nSamples == 0 :
112  print("Empty input list, exit.")
113  sys.exit(12)
114  else :
115  print('Processing', nSamples, 'data sets.')
116  #check event numbers
117  nSampleEvts = list()
118  for s in dsetpaths:
119  nSampleEvts.append(check_nevts_dset(s))
120  print('number of events per dataset:', nSampleEvts)
121 
122 def find_dqmref(ds) :
123  if not do_reference :
124  return 'NONE'
125  cp = cmssw_ver[-1:]
126  ip = (int)(cp) - 1
127  ref_ver = cmssw_ver.replace(cp,str(ip))
128  #print "cms:", cmssw_ver, " cp:", cp, " ip:", ip, " new_ver:", ref_ver
129  ref_dir = "/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver + "/"
130  ref_dsf = make_dqmname(ds.replace(cmssw_ver, ref_ver))
131  gls = " | grep root | grep "
132  #to accept crab appended _1.root in file names, nd skip versions/conditions
133  gls += ref_dsf[:-25]
134  gls += "| awk '{print $9}' "
135  #print "refds:", ref_dsf, " command: rfdir", ref_dir+gls
136  command = "rfcp " + ref_dir + "`rfdir " + ref_dir + gls + "` ."
137  #print "command:", command
138  os.system(command)
139  tmpfile = "ref.txt"
140  command = "ls -rtl *" + gls + " > " + tmpfile
141  #print "command:", command
142  os.system(command)
143  the_ref = 'NONE'
144  if os.path.exists(tmpfile) :
145  fin = open(tmpfile,'r')
146  ref = fin.readline().replace('\n','')
147  #print "read ref:", ref, "exists?", os.path.exists(ref)
148  fin.close()
149  if os.path.exists(ref) :
150  the_ref = ref
151  else :
152  the_ref = 'NONE'
153  print("Found reference file:", the_ref)
154  return the_ref
155 
156 def create_harvest(ds) :
157  raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
158  cmsdriver = raw_cmsdriver
159  cond = get_cond_from_dsetpath(ds)
160  if cond == 0 :
161  print('unexpected problem with conditions')
162  sys.exit(50)
163  cmsdriver = cmsdriver.replace('30X',cond)
164  fin_name="harvest_HARVESTING_STARTUP.py"
165  if ds.find('IDEAL') != -1 :
166  cmsdriver = cmsdriver.replace('STARTUP','IDEAL')
167  fin_name = fin_name.replace('STARTUP','IDEAL')
168  if ds.find('FastSim') != -1:
169  cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingFS')
170  if ds.find('PileUp') != -1:
171  cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingPU')
172 
173  #print "=>", cmsdriver, " fs?", ds.find('FastSim')
174  if os.path.exists(fin_name) :
175  os.system("rm " + fin_name)
176  print("executing cmsdriver command:\n\t", cmsdriver)
177  os.system(cmsdriver)
178  if not os.path.exists(fin_name) :
179  print('problem with cmsdriver file name')
180  sys.exit(40)
181  os.system("touch " + fin_name)
182  hf = make_harv_name(ds)
183  os.system('mv ' + fin_name + " " + hf)
184  out = open(hf, 'a')
185  out.write("\n\n##additions to cmsDriver output \n")
186  out.write("process.dqmSaver.workflow = '" + ds + "'\n")
187  if is_dbs :
188  out.write("process.source.fileNames = cms.untracked.vstring(\n")
189  for afile in api.listFiles(path=ds):
190  out.write(" '%s',\n" % afile['LogicalFileName'])
191  out.write(")\n")
192 
193  dqmref = find_dqmref(ds);
194  if not dqmref == 'NONE' :
195  out.write("process.DQMStore.referenceFileName = '" + dqmref + "'\n")
196  out.write("process.dqmSaver.referenceHandling = 'all'\n")
197 
198  out.close()
199 
200 def create_mcrab(set, fcrab, fout):
201  out = open(fout, 'w')
202  out.write('[MULTICRAB]')
203  out.write('\ncfg=' + fcrab)
204  out.write('\n\n[COMMON]')
205  nevt = -1
206  njob = 1
207  out.write('\nCMSSW.total_number_of_events=' + (str)(nevt) )
208  out.write('\nCMSSW.number_of_jobs=' + (str)(njob) )
209  for s in set:
210  append_sample_mcrab(s, out)
211  out.close()
212 
213 def make_harv_name(dset) :
214  return 'harvest_' + get_name_from_dsetpath(dset) + '.py'
215 
216 def append_sample_mcrab(dsetp, fout):
217  dqm = make_dqmname(dsetp)
218  sample = get_name_from_dsetpath(dsetp)
219  hf = make_harv_name(dsetp)
220  if not os.path.exists(hf) :
221  print('problem creating multicrab, file', hf, 'does not exist')
222  sys.exit(17)
223  fout.write('\n\n[' + sample + ']')
224  fout.write('\nCMSSW.pset=' + hf)
225  fout.write('\nCMSSW.datasetpath=' + dsetp)
226  fout.write('\nCMSSW.output_file=' + dqm)
227 
228  dqmref = find_dqmref(dsetp);
229  if not dqmref == 'NONE' :
230  fout.write('\nUSER.additional_input_files=' + dqmref)
231 
232 def create_crab(ds) :
233  dqmout = make_dqmname(ds)
234  hf = make_harv_name(ds)
235  out = open(f_crab, 'w')
236  out.write(crab_block)
237  out.write('\npset=' + hf)
238  out.write('datasetpath=' + ds)
239  out.write('\noutput_file=' + dqmout)
240  out.close()
241 
242 crab_block = """
243 [CRAB]
244 jobtype = cmssw
245 scheduler = glite
246 
247 [EDG]
248 remove_default_blacklist=1
249 rb = CERN
250 
251 [USER]
252 return_data = 1
253 #copy_data = 1
254 #storage_element=srm-cms.cern.ch
255 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
256 #user_remote_dir=/user/n/nuno/test
257 publish_data=0
258 thresholdLevel=70
259 eMail=nuno@cern.ch
260 
261 [CMSSW]
262 total_number_of_events=-1
263 show_prod = 1
264 number_of_jobs=1
265 """
266 
267 
268 #Check arg,settings
269 input_type = ''
270 argin = ''
271 dsfile = ''
272 do_reference = False
273 if len(sys.argv) > 2 :
274  print_def()
275  sys.exit(10)
276 elif len(sys.argv) == 1 :
277  print("Will search for available datasets.")
278  input_type = 'none'
279 elif len(sys.argv) == 2 :
280  argin = sys.argv[1]
281  if os.path.exists(argin) :
282  dsfile = argin
283  #print 'Reading list of datasets from', dsfile
284  input_type = 'file'
285  elif argin.find('CMSSW') != -1 and argin.find('RelVal'):
286  print('Using specified data set', argin)
287  input_type = 'ds'
288  else :
289  print('Invalid argument: process list, dataset or file', \
290  argin, 'does not exist.')
291  sys.exit(11)
292 
293 #dbs
294 is_dbs = check_dbs()
295 if not is_dbs:
296  print("dbs not set!")
297 else:
298  print("dbs home:", os.getenv('DBSCMD_HOME'))
299  from DBSAPI.dbsApi import DbsApi
300  from DBSAPI.dbsException import *
301  from DBSAPI.dbsApiException import *
302  from DBSAPI.dbsOptions import DbsOptionParser
303  optManager = DbsOptionParser()
304  (opts,args) = optManager.getOpt()
305  #api = DbsApi(opts.__dict__)
306  args={}
307  args['url']= "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
308  api = DbsApi(args)
309 
310 #cmssw
311 cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')
312 if cmssw_ver == 'NOTSET' :
313  print("""
314  cmssw not set!
315  example:
316  scramv1 p CMSSW CMSSW_3_1_0_pre5
317  cd CMSSW_3_1_0_pre5/src
318  eval `scramv1 runtime -sh`
319  cd -
320  """)
321  sys.exit(12)
322 else :
323  print("Using cmssw version:", cmssw_ver)
324 
325 
326 #read datasets
327 dsetpaths = list()
328 
329 if input_type == 'none' :
330  if not is_dbs :
331  print("no dataset specified, and dbs isn't set...")
332  print_def()
333  sys.exit(13)
334  else :
335  dsfile = cmssw_ver + "_dbslist.txt"
336  make_dbs_list(dsfile)
337  read_ds_file()
338 elif input_type == 'file' :
339  read_ds_file()
340 elif input_type == 'ds' :
341  dsetpaths.append(argin)
342 
343 
344 #check dataset list: remove incompatible dsets
345 check_dset()
346 
347 #print dataset list to be processed
348 print('data sets:', dsetpaths)
349 dslproc = open("dset_processed.txt", 'w')
350 for s in dsetpaths :
351  dslproc.write(s+'\n')
352 dslproc.close()
353 
354 
355 ##Create harvest.py template
356 create_harvest(dsetpaths[0])
357 
358 ##Create crab.cfg template
359 f_crab = 'crab.cfg'
360 create_crab(dsetpaths[0])
361 
362 ##Create harvest_n.py for individual datasets
363 for s in dsetpaths:
364  create_harvest(s)
365 
366 ##Create multicrab.cfg
367 f_multi_crab = 'multicrab.cfg'
368 create_mcrab(dsetpaths, f_crab, f_multi_crab)
369 
370 ##Print what has been created
371 
372 harvfilelist = list()
373 for s in dsetpaths:
374  harvfilelist.append(make_harv_name(s))
375 
376 print('\nCreated:\n\t %(pwd)s/%(cf)s \n\t %(pwd)s/%(mc)s' \
377  % {'pwd' : os.environ["PWD"],'cf' : f_crab, 'mc' : f_multi_crab})
378 print("\tIndividual harvest py's:\n\t", harvfilelist)
379 
380 print("Done.")
def read_ds_file()
def get_name_from_dsetpath(ds)
def replace(string, replacements)
def create_mcrab(set, fcrab, fout)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
def make_dbs_list(dbslf)
def make_harv_name(dset)
def append_sample_mcrab(dsetp, fout)
def get_cond_from_dsetpath(ds)
def check_nevts_dset(dset)
def find_dqmref(ds)
def create_harvest(ds)
def create_crab(ds)
#define str(s)
def make_dqmname(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run