CMS 3D CMS Logo

doHarvest.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 import sys
5 import os
6 
7 #Check arg,settings
8 
9 if len(sys.argv) != 2 :
10  print("""
11  Usage: create_harvesting_py.py <dataset>
12  example:
13  create_harvesting_py.py \
14  /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO
15  """)
16  sys.exit(10)
17 
18 #Get data files of dataset to be processed
19 if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
20  print("dbs not set!")
21  sys.exit(11)
22 
23 if os.getenv('CMSSW_VERSION','NOTSET') == 'NOTSET' :
24  print("""
25  cmssw not set!
26  example:
27  cmsrel CMSSW_3_1_0_pre4
28  cd CMSSW_3_1_0_pre4/src
29  eval `scramv1 runtime -sh`
30  cd -
31  """)
32  sys.exit(12)
33 
34 dsetpath = sys.argv[1]
35 
36 from DBSAPI.dbsApi import DbsApi
37 from DBSAPI.dbsException import *
38 from DBSAPI.dbsApiException import *
39 from DBSAPI.dbsOptions import DbsOptionParser
40 
41 optManager = DbsOptionParser()
42 (opts,args) = optManager.getOpt()
43 api = DbsApi(opts.__dict__)
44 
45 print("dataset: ", dsetpath)
46 print("data files: ")
47 for afile in api.listFiles(path=dsetpath):
48  print(" %s" % afile['LogicalFileName'])
49 
50 #Determine number of events/processes
51 totnevts=0
52 for afile in api.listFiles(path=dsetpath):
53  totnevts += afile['NumberOfEvents']
54 njobs = 1
55 nevtref = 9000
56 if totnevts > nevtref : njobs = (int) (totnevts / 9000)
57 print("Total # events: ", totnevts, \
58  " to be executed in ", njobs, "processes")
59 
60 
61 #Run cmsDriver command
62 raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
63 
64 print("executing cmsdriver command:\n\t", raw_cmsdriver)
65 
66 os.system( '`' + raw_cmsdriver + '`' )
67 
68 
69 #Open output py
70 fin_name="harvest_HARVESTING_STARTUP.py"
71 pyout_name = "harvest.py"
72 os.system("touch " + fin_name)
73 os.system('mv ' + fin_name + " " + pyout_name )
74 pyout = open(pyout_name, 'a')
75 
76 #Added to py config: input, output file name, dqm settings
77 pyout.write("\n\n##additions to cmsDriver output \n")
78 pyout.write("#DQMStore.referenceFileName = ''\n")
79 pyout.write("process.dqmSaver.workflow = '" + dsetpath + "'\n")
80 pyout.write("process.source.fileNames = cms.untracked.vstring(\n")
81 
82 for afile in api.listFiles(path=dsetpath):
83  pyout.write(" '%s',\n" % afile['LogicalFileName'])
84 
85 pyout.write(")")
86 pyout.close()
87 
88 
89 #Create crab conf
90 
91 crab_block = """
92 [CRAB]
93 jobtype = cmssw
94 scheduler = glite
95 #server_name =
96 
97 [EDG]
98 remove_default_blacklist=1
99 rb = CERN
100 
101 [USER]
102 return_data = 0
103 copy_data = 1
104 storage_element=srm-cms.cern.ch
105 storage_path=/srm/managerv2?SFN=/castor/cern.ch/
106 user_remote_dir=/user/n/nuno/relval/harvest/
107 publish_data=0
108 thresholdLevel=70
109 eMail=nuno@cern.ch
110 
111 [CMSSW]
112 total_number_of_events=-1
113 show_prod = 1
114 """
115 
116 crab_name="crab.cfg"
117 os.system("touch " + crab_name)
118 os.system("mv " + crab_name + " " + crab_name + "_old")
119 
120 crab_cfg = open(crab_name, 'w')
121 crab_cfg.write(crab_block)
122 
123 rootfile = "DQM_V0001_R000000001" \
124  + dsetpath.replace('/','__') \
125  + ".root"
126 
127 crab_cfg.write("number_of_jobs=" + str(njobs) + "\n")
128 crab_cfg.write("pset=" + pyout_name + "\n")
129 crab_cfg.write("output_file=" + rootfile + "\n")
130 crab_cfg.write("datasetpath=" + dsetpath + "\n")
131 
132 
133 crab_cfg.close()
134 
135 #os.system("cat " + pyout_name)
136 #print "Created crab conf:\t", crab_name,"\n"
137 
138 print('\n\nCreated:\n\t %(pwd)s/%(pf)s \n\t %(pwd)s/%(cf)s' \
139  % {'pwd' : os.environ["PWD"], 'pf' : pyout_name, 'cf' : crab_name})
140 
141 print("Done.")
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
#define str(s)