3 from __future__
import print_function
8 arguments [<list-of-processes>]
10 creates crab.cfg, multicrab.cfg, harvest_*.py
12 prints number of events found in dataset
13 if no argument is provided looks for all available datsets for release
14 user can edit multicrab and confirm process list as needed
19 print(
"Usage:", sys.argv[0],
"[list_of_processes]")
21 print(
"harvestRelVal.py")
22 print(
"harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO")
23 print(
"harvestRelVal.py <dataset_list.txt>")
26 if os.getenv(
'DBSCMD_HOME',
'NOTSET') ==
'NOTSET' :
34 for afile
in api.listFiles(path=
str(dset)):
35 nevts = afile[
'NumberOfEvents']
41 return 'DQM_V0001_R000000001' + s.replace(
'/',
'__') +
'.root'
50 cb = ca[:ca.find(
'v')-1]
51 if cb[0].
find(
'3') == -1
or len(cb) > 3:
52 print(
"problem extracting condition for", ds,
" : ", cb,
'(len:',len(cb),
')')
53 if cb.find(
'31X') != -1:
55 elif cb.find(
'30X') != -1:
60 print(
"condition found:", cb)
62 print(
"good condition for", ds,
" : ", cb,
'(len:',len(cb),
')')
69 flis = open(dbslf,
'w')
70 for ads
in api.listDatasetPaths() :
71 if ads.find(
'RelVal') != -1 \
72 or ads.find(cmssw_ver) != -1 \
73 or ads.find(
"/GEN-SIM") != -1 :
75 flis.write(ads +
'\n')
77 print(
'Generated dataset list', dbslf,
'from dbs.')
84 if not os.path.exists(dsfile) :
85 print(
"problem reading file", dsfile)
87 fin = open(dsfile,
'r')
88 for dset
in fin.readlines():
89 d = dset.replace(
'\n',
'')
90 if d.find(
'#') == -1 :
95 print(
'Using data set list in ', dsfile)
100 if s.find(cmssw_ver) == -1 :
102 print(
'Inconsistency found with datset and cmssw version (', cmssw_ver,
')' \
103 ': \t ', s,
' has been removed.')
105 for s
in dsetpaths[:]:
110 nSamples = len(dsetpaths)
112 print(
"Empty input list, exit.")
115 print(
'Processing', nSamples,
'data sets.')
120 print(
'number of events per dataset:', nSampleEvts)
123 if not do_reference :
127 ref_ver = cmssw_ver.replace(cp,
str(ip))
129 ref_dir =
"/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver +
"/"
131 gls =
" | grep root | grep "
134 gls +=
"| awk '{print $9}' "
136 command =
"rfcp " + ref_dir +
"`rfdir " + ref_dir + gls +
"` ."
140 command =
"ls -rtl *" + gls +
" > " + tmpfile
144 if os.path.exists(tmpfile) :
145 fin = open(tmpfile,
'r')
146 ref = fin.readline().replace('\n',
'')
149 if os.path.exists(ref) :
153 print(
"Found reference file:", the_ref)
157 raw_cmsdriver =
"cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
158 cmsdriver = raw_cmsdriver
161 print(
'unexpected problem with conditions')
163 cmsdriver = cmsdriver.replace(
'30X',cond)
164 fin_name=
"harvest_HARVESTING_STARTUP.py"
165 if ds.find(
'IDEAL') != -1 :
166 cmsdriver = cmsdriver.replace(
'STARTUP',
'IDEAL')
167 fin_name = fin_name.replace(
'STARTUP',
'IDEAL')
168 if ds.find(
'FastSim') != -1:
169 cmsdriver = cmsdriver.replace(
'validationHarvesting',
'validationHarvestingFS')
170 if ds.find(
'PileUp') != -1:
171 cmsdriver = cmsdriver.replace(
'validationHarvesting',
'validationHarvestingPU')
174 if os.path.exists(fin_name) :
175 os.system(
"rm " + fin_name)
176 print(
"executing cmsdriver command:\n\t", cmsdriver)
178 if not os.path.exists(fin_name) :
179 print(
'problem with cmsdriver file name')
181 os.system(
"touch " + fin_name)
183 os.system(
'mv ' + fin_name +
" " + hf)
185 out.write(
"\n\n##additions to cmsDriver output \n")
186 out.write(
"process.dqmSaver.workflow = '" + ds +
"'\n")
188 out.write(
"process.source.fileNames = cms.untracked.vstring(\n")
189 for afile
in api.listFiles(path=ds):
190 out.write(
" '%s',\n" % afile[
'LogicalFileName'])
194 if not dqmref ==
'NONE' :
195 out.write(
"process.dqmSaver.referenceHandling = 'all'\n")
200 out = open(fout,
'w')
201 out.write(
'[MULTICRAB]')
202 out.write(
'\ncfg=' + fcrab)
203 out.write(
'\n\n[COMMON]')
206 out.write(
'\nCMSSW.total_number_of_events=' + (str)(nevt) )
207 out.write(
'\nCMSSW.number_of_jobs=' + (str)(njob) )
219 if not os.path.exists(hf) :
220 print(
'problem creating multicrab, file', hf,
'does not exist')
222 fout.write(
'\n\n[' + sample +
']')
223 fout.write(
'\nCMSSW.pset=' + hf)
224 fout.write(
'\nCMSSW.datasetpath=' + dsetp)
225 fout.write(
'\nCMSSW.output_file=' + dqm)
228 if not dqmref ==
'NONE' :
229 fout.write(
'\nUSER.additional_input_files=' + dqmref)
234 out = open(f_crab,
'w')
235 out.write(crab_block)
236 out.write(
'\npset=' + hf)
237 out.write(
'datasetpath=' + ds)
238 out.write(
'\noutput_file=' + dqmout)
247 remove_default_blacklist=1
253 #storage_element=srm-cms.cern.ch
254 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
255 #user_remote_dir=/user/n/nuno/test
261 total_number_of_events=-1
272 if len(sys.argv) > 2 :
275 elif len(sys.argv) == 1 :
276 print(
"Will search for available datasets.")
278 elif len(sys.argv) == 2 :
280 if os.path.exists(argin) :
284 elif argin.find(
'CMSSW') != -1
and argin.find(
'RelVal'):
285 print(
'Using specified data set', argin)
288 print(
'Invalid argument: process list, dataset or file', \
289 argin,
'does not exist.')
295 print(
"dbs not set!")
297 print(
"dbs home:", os.getenv(
'DBSCMD_HOME'))
298 from DBSAPI.dbsApi
import DbsApi
301 from DBSAPI.dbsOptions
import DbsOptionParser
302 optManager = DbsOptionParser()
303 (opts,args) = optManager.getOpt()
306 args[
'url']=
"http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
310 cmssw_ver = os.getenv(
'CMSSW_VERSION',
'NOTSET')
311 if cmssw_ver ==
'NOTSET' :
315 scramv1 p CMSSW CMSSW_3_1_0_pre5
316 cd CMSSW_3_1_0_pre5/src
317 eval `scramv1 runtime -sh`
322 print(
"Using cmssw version:", cmssw_ver)
328 if input_type ==
'none' :
330 print(
"no dataset specified, and dbs isn't set...")
334 dsfile = cmssw_ver +
"_dbslist.txt"
337 elif input_type ==
'file' :
339 elif input_type ==
'ds' :
340 dsetpaths.append(argin)
347 print(
'data sets:', dsetpaths)
348 dslproc = open(
"dset_processed.txt",
'w')
350 dslproc.write(s+
'\n')
366 f_multi_crab =
'multicrab.cfg'
371 harvfilelist = list()
375 print(
'\nCreated:\n\t %(pwd)s/%(cf)s \n\t %(pwd)s/%(mc)s' \
376 % {
'pwd' : os.environ[
"PWD"],
'cf' : f_crab,
'mc' : f_multi_crab})
377 print(
"\tIndividual harvest py's:\n\t", harvfilelist)
def get_name_from_dsetpath
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
def get_cond_from_dsetpath
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)