CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
harvestRelVal.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import sys
4 import os
5 
6 """
7 arguments [<list-of-processes>]
8 description:
9 creates crab.cfg, multicrab.cfg, harvest_*.py
10 if dbs is set:
11  prints number of events found in dataset
12  if no argument is provided looks for all available datsets for release
13  user can edit multicrab and confirm process list as needed
14 nuno@cern.ch 09.04
15 """
16 
17 def print_def():
18  print "Usage:", sys.argv[0], "[list_of_processes]"
19  print "Examples:"
20  print "harvestRelVal.py"
21  print "harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO"
22  print "harvestRelVal.py <dataset_list.txt>"
23 
24 def check_dbs():
25  if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
26  return 0
27  return 1
28 
29 def check_nevts_dset(dset):
30  if not is_dbs :
31  return -1
32  ntot=0
33  for afile in api.listFiles(path=str(dset)):
34  nevts = afile['NumberOfEvents']
35  ntot += nevts
36  #print " %s" % afile['LogicalFileName']
37  return ntot
38 
39 def make_dqmname(s):
40  return 'DQM_V0001_R000000001' + s.replace('/','__') + '.root'
41 
43  fs = ds.split('/')
44  fa = fs[1].replace('RelVal','')
45  return fa
46 
48  ca = ds.split('/')[2].replace(cmssw_ver+'_','').replace('IDEAL_','').replace('STARTUP_','').replace('_FastSim','')
49  cb = ca[:ca.find('v')-1]
50  if cb[0].find('3') == -1 or len(cb) > 3:
51  print "problem extracting condition for", ds, " : ", cb, '(len:',len(cb),')'
52  if cb.find('31X') != -1:
53  cb = '31X'
54  elif cb.find('30X') != -1:
55  cb = '30X'
56  else:
57  print "skipping", cb
58  return 0
59  print "condition found:", cb
60  else :
61  print "good condition for", ds, " : ", cb, '(len:',len(cb),')'
62  return cb
63 
64 
65 def make_dbs_list(dbslf) :
66  if not is_dbs :
67  return
68  flis = open(dbslf,'w')
69  for ads in api.listDatasetPaths() :
70  if ads.find('RelVal') != -1 \
71  or ads.find(cmssw_ver) != -1 \
72  or ads.find("/GEN-SIM") != -1 :
73 # and ads.find("/GEN-SIM-RECO") != -1 :
74  flis.write(ads + '\n')
75  flis.close()
76  print 'Generated dataset list', dbslf, 'from dbs.'
77  #exampe:
78  #dbs lsd --path=/RelVal*/CMSSW_3_1_0_pre5*/GEN-SIM-RECO --url=http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet > mylist.txt
79  #dbslsd = "dbs lsd --path=/RelVal*/" + cmssw_ver + "*/GEN-SIM-RECO --url=http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
80  #os.system( '`' + dbslsd + ' > ' + dbslf + '`')
81 
82 def read_ds_file() :
83  if not os.path.exists(dsfile) :
84  print "problem reading file", dsfile
85  sys.exit(30)
86  fin = open(dsfile,'r')
87  for dset in fin.readlines():
88  d = dset.replace('\n','')
89  if d.find('#') == -1 :
90  dsetpaths.append(d)
91  else :
92  print 'skipping:', d
93  fin.close()
94  print 'Using data set list in ', dsfile
95 
96 def check_dset() :
97  #check cmssw consistency
98  for s in dsetpaths:
99  if s.find(cmssw_ver) == -1 :
100  dsetpaths.remove(s)
101  print 'Inconsistency found with datset and cmssw version (', cmssw_ver, ')' \
102  ': \t ', s, ' has been removed.'
103  #check conditions from dsetname
104  for s in dsetpaths[:]: #nb:need to make a copy here!
105  cond = get_cond_from_dsetpath(s)
106  if cond == 0 :
107  dsetpaths.remove(s)
108  #check list size
109  nSamples = len(dsetpaths)
110  if nSamples == 0 :
111  print "Empty input list, exit."
112  sys.exit(12)
113  else :
114  print 'Processing', nSamples, 'data sets.'
115  #check event numbers
116  nSampleEvts = list()
117  for s in dsetpaths:
118  nSampleEvts.append(check_nevts_dset(s))
119  print 'number of events per dataset:', nSampleEvts
120 
121 def find_dqmref(ds) :
122  if not do_reference :
123  return 'NONE'
124  cp = cmssw_ver[-1:]
125  ip = (int)(cp) - 1
126  ref_ver = cmssw_ver.replace(cp,str(ip))
127  #print "cms:", cmssw_ver, " cp:", cp, " ip:", ip, " new_ver:", ref_ver
128  ref_dir = "/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver + "/"
129  ref_dsf = make_dqmname(ds.replace(cmssw_ver, ref_ver))
130  gls = " | grep root | grep "
131  #to accept crab appended _1.root in file names, nd skip versions/conditions
132  gls += ref_dsf[:-25]
133  gls += "| awk '{print $9}' "
134  #print "refds:", ref_dsf, " command: rfdir", ref_dir+gls
135  command = "rfcp " + ref_dir + "`rfdir " + ref_dir + gls + "` ."
136  #print "command:", command
137  os.system(command)
138  tmpfile = "ref.txt"
139  command = "ls -rtl *" + gls + " > " + tmpfile
140  #print "command:", command
141  os.system(command)
142  the_ref = 'NONE'
143  if os.path.exists(tmpfile) :
144  fin = open(tmpfile,'r')
145  ref = fin.readline().replace('\n','')
146  #print "read ref:", ref, "exists?", os.path.exists(ref)
147  fin.close()
148  if os.path.exists(ref) :
149  the_ref = ref
150  else :
151  the_ref = 'NONE'
152  print "Found reference file:", the_ref
153  return the_ref
154 
155 def create_harvest(ds) :
156  raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
157  cmsdriver = raw_cmsdriver
158  cond = get_cond_from_dsetpath(ds)
159  if cond == 0 :
160  print 'unexpected problem with conditions'
161  sys.exit(50)
162  cmsdriver = cmsdriver.replace('30X',cond)
163  fin_name="harvest_HARVESTING_STARTUP.py"
164  if ds.find('IDEAL') != -1 :
165  cmsdriver = cmsdriver.replace('STARTUP','IDEAL')
166  fin_name = fin_name.replace('STARTUP','IDEAL')
167  if ds.find('FastSim') != -1:
168  cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingFS')
169  if ds.find('PileUp') != -1:
170  cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingPU')
171 
172  #print "=>", cmsdriver, " fs?", ds.find('FastSim')
173  if os.path.exists(fin_name) :
174  os.system("rm " + fin_name)
175  print "executing cmsdriver command:\n\t", cmsdriver
176  os.system(cmsdriver)
177  if not os.path.exists(fin_name) :
178  print 'problem with cmsdriver file name'
179  sys.exit(40)
180  os.system("touch " + fin_name)
181  hf = make_harv_name(ds)
182  os.system('mv ' + fin_name + " " + hf)
183  out = open(hf, 'a')
184  out.write("\n\n##additions to cmsDriver output \n")
185  out.write("process.dqmSaver.workflow = '" + ds + "'\n")
186  if is_dbs :
187  out.write("process.source.fileNames = cms.untracked.vstring(\n")
188  for afile in api.listFiles(path=ds):
189  out.write(" '%s',\n" % afile['LogicalFileName'])
190  out.write(")\n")
191 
192  dqmref = find_dqmref(ds);
193  if not dqmref == 'NONE' :
194  out.write("process.DQMStore.referenceFileName = '" + dqmref + "'\n")
195  out.write("process.dqmSaver.referenceHandling = 'all'\n")
196 
197  out.close()
198 
199 def create_mcrab(set, fcrab, fout):
200  out = open(fout, 'w')
201  out.write('[MULTICRAB]')
202  out.write('\ncfg=' + fcrab)
203  out.write('\n\n[COMMON]')
204  nevt = -1
205  njob = 1
206  out.write('\nCMSSW.total_number_of_events=' + (str)(nevt) )
207  out.write('\nCMSSW.number_of_jobs=' + (str)(njob) )
208  for s in set:
209  append_sample_mcrab(s, out)
210  out.close()
211 
212 def make_harv_name(dset) :
213  return 'harvest_' + get_name_from_dsetpath(dset) + '.py'
214 
215 def append_sample_mcrab(dsetp, fout):
216  dqm = make_dqmname(dsetp)
217  sample = get_name_from_dsetpath(dsetp)
218  hf = make_harv_name(dsetp)
219  if not os.path.exists(hf) :
220  print 'problem creating multicrab, file', hf, 'does not exist'
221  sys.exit(17)
222  fout.write('\n\n[' + sample + ']')
223  fout.write('\nCMSSW.pset=' + hf)
224  fout.write('\nCMSSW.datasetpath=' + dsetp)
225  fout.write('\nCMSSW.output_file=' + dqm)
226 
227  dqmref = find_dqmref(dsetp);
228  if not dqmref == 'NONE' :
229  fout.write('\nUSER.additional_input_files=' + dqmref)
230 
231 def create_crab(ds) :
232  dqmout = make_dqmname(ds)
233  hf = make_harv_name(ds)
234  out = open(f_crab, 'w')
235  out.write(crab_block)
236  out.write('\npset=' + hf)
237  out.write('datasetpath=' + ds)
238  out.write('\noutput_file=' + dqmout)
239  out.close()
240 
241 crab_block = """
242 [CRAB]
243 jobtype = cmssw
244 scheduler = glite
245 
246 [EDG]
247 remove_default_blacklist=1
248 rb = CERN
249 
250 [USER]
251 return_data = 1
252 #copy_data = 1
253 #storage_element=srm-cms.cern.ch
254 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
255 #user_remote_dir=/user/n/nuno/test
256 publish_data=0
257 thresholdLevel=70
258 eMail=nuno@cern.ch
259 
260 [CMSSW]
261 total_number_of_events=-1
262 show_prod = 1
263 number_of_jobs=1
264 """
265 
266 
267 #Check arg,settings
268 input_type = ''
269 argin = ''
270 dsfile = ''
271 do_reference = False
272 if len(sys.argv) > 2 :
273  print_def()
274  sys.exit(10)
275 elif len(sys.argv) == 1 :
276  print "Will search for available datasets."
277  input_type = 'none'
278 elif len(sys.argv) == 2 :
279  argin = sys.argv[1]
280  if os.path.exists(argin) :
281  dsfile = argin
282  #print 'Reading list of datasets from', dsfile
283  input_type = 'file'
284  elif argin.find('CMSSW') != -1 and argin.find('RelVal'):
285  print 'Using specified data set', argin
286  input_type = 'ds'
287  else :
288  print 'Invalid argument: process list, dataset or file', \
289  argin, 'does not exist.'
290  sys.exit(11)
291 
292 #dbs
293 is_dbs = check_dbs()
294 if not is_dbs:
295  print "dbs not set!"
296 else:
297  print "dbs home:", os.getenv('DBSCMD_HOME')
298  from DBSAPI.dbsApi import DbsApi
299  from DBSAPI.dbsException import *
300  from DBSAPI.dbsApiException import *
301  from DBSAPI.dbsOptions import DbsOptionParser
302  optManager = DbsOptionParser()
303  (opts,args) = optManager.getOpt()
304  #api = DbsApi(opts.__dict__)
305  args={}
306  args['url']= "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
307  api = DbsApi(args)
308 
309 #cmssw
310 cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')
311 if cmssw_ver == 'NOTSET' :
312  print """
313  cmssw not set!
314  example:
315  scramv1 p CMSSW CMSSW_3_1_0_pre5
316  cd CMSSW_3_1_0_pre5/src
317  eval `scramv1 runtime -sh`
318  cd -
319  """
320  sys.exit(12)
321 else :
322  print "Using cmssw version:", cmssw_ver
323 
324 
325 #read datasets
326 dsetpaths = list()
327 
328 if input_type == 'none' :
329  if not is_dbs :
330  print "no dataset specified, and dbs isn't set..."
331  print_def()
332  sys.exit(13)
333  else :
334  dsfile = cmssw_ver + "_dbslist.txt"
335  make_dbs_list(dsfile)
336  read_ds_file()
337 elif input_type == 'file' :
338  read_ds_file()
339 elif input_type == 'ds' :
340  dsetpaths.append(argin)
341 
342 
343 #check dataset list: remove incompatible dsets
344 check_dset()
345 
346 #print dataset list to be processed
347 print 'data sets:', dsetpaths
348 dslproc = open("dset_processed.txt", 'w')
349 for s in dsetpaths :
350  dslproc.write(s+'\n')
351 dslproc.close()
352 
353 
354 ##Create harvest.py template
355 create_harvest(dsetpaths[0])
356 
357 ##Create crab.cfg template
358 f_crab = 'crab.cfg'
359 create_crab(dsetpaths[0])
360 
361 ##Create harvest_n.py for individual datasets
362 for s in dsetpaths:
363  create_harvest(s)
364 
365 ##Create multicrab.cfg
366 f_multi_crab = 'multicrab.cfg'
367 create_mcrab(dsetpaths, f_crab, f_multi_crab)
368 
369 ##Print what has been created
370 
371 harvfilelist = list()
372 for s in dsetpaths:
373  harvfilelist.append(make_harv_name(s))
374 
375 print '\nCreated:\n\t %(pwd)s/%(cf)s \n\t %(pwd)s/%(mc)s' \
376  % {'pwd' : os.environ["PWD"],'cf' : f_crab, 'mc' : f_multi_crab}
377 print "\tIndividual harvest py's:\n\t", harvfilelist
378 
379 print "Done."
def get_name_from_dsetpath
def replace
Definition: linker.py:10
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:7
def get_cond_from_dsetpath
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run