CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
doHarvest.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import sys
4 import os
5 
6 #Check arg,settings
7 
8 if len(sys.argv) != 2 :
9  print """
10  Usage: create_harvesting_py.py <dataset>
11  example:
12  create_harvesting_py.py \
13  /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO
14  """
15  sys.exit(10)
16 
17 #Get data files of dataset to be processed
18 if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
19  print "dbs not set!"
20  sys.exit(11)
21 
22 if os.getenv('CMSSW_VERSION','NOTSET') == 'NOTSET' :
23  print """
24  cmssw not set!
25  example:
26  cmsrel CMSSW_3_1_0_pre4
27  cd CMSSW_3_1_0_pre4/src
28  eval `scramv1 runtime -sh`
29  cd -
30  """
31  sys.exit(12)
32 
33 dsetpath = sys.argv[1]
34 
35 from DBSAPI.dbsApi import DbsApi
36 from DBSAPI.dbsException import *
37 from DBSAPI.dbsApiException import *
38 from DBSAPI.dbsOptions import DbsOptionParser
39 
40 optManager = DbsOptionParser()
41 (opts,args) = optManager.getOpt()
42 api = DbsApi(opts.__dict__)
43 
44 print "dataset: ", dsetpath
45 print "data files: "
46 for afile in api.listFiles(path=dsetpath):
47  print " %s" % afile['LogicalFileName']
48 
49 #Determine number of events/processes
50 totnevts=0
51 for afile in api.listFiles(path=dsetpath):
52  totnevts += afile['NumberOfEvents']
53 njobs = 1
54 nevtref = 9000
55 if totnevts > nevtref : njobs = (int) (totnevts / 9000)
56 print "Total # events: ", totnevts, \
57  " to be executed in ", njobs, "processes"
58 
59 
60 #Run cmsDriver command
61 raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
62 
63 print "executing cmsdriver command:\n\t", raw_cmsdriver
64 
65 os.system( '`' + raw_cmsdriver + '`' )
66 
67 
68 #Open output py
69 fin_name="harvest_HARVESTING_STARTUP.py"
70 pyout_name = "harvest.py"
71 os.system("touch " + fin_name)
72 os.system('mv ' + fin_name + " " + pyout_name )
73 pyout = open(pyout_name, 'a')
74 
75 #Added to py config: input, output file name, dqm settings
76 pyout.write("\n\n##additions to cmsDriver output \n")
77 pyout.write("#DQMStore.referenceFileName = ''\n")
78 pyout.write("process.dqmSaver.workflow = '" + dsetpath + "'\n")
79 pyout.write("process.source.fileNames = cms.untracked.vstring(\n")
80 
81 for afile in api.listFiles(path=dsetpath):
82  pyout.write(" '%s',\n" % afile['LogicalFileName'])
83 
84 pyout.write(")")
85 pyout.close()
86 
87 
88 #Create crab conf
89 
90 crab_block = """
91 [CRAB]
92 jobtype = cmssw
93 scheduler = glite
94 #server_name =
95 
96 [EDG]
97 remove_default_blacklist=1
98 rb = CERN
99 
100 [USER]
101 return_data = 0
102 copy_data = 1
103 storage_element=srm-cms.cern.ch
104 storage_path=/srm/managerv2?SFN=/castor/cern.ch/
105 user_remote_dir=/user/n/nuno/relval/harvest/
106 publish_data=0
107 thresholdLevel=70
108 eMail=nuno@cern.ch
109 
110 [CMSSW]
111 total_number_of_events=-1
112 show_prod = 1
113 """
114 
115 crab_name="crab.cfg"
116 os.system("touch " + crab_name)
117 os.system("mv " + crab_name + " " + crab_name + "_old")
118 
119 crab_cfg = open(crab_name, 'w')
120 crab_cfg.write(crab_block)
121 
122 rootfile = "DQM_V0001_R000000001" \
123  + dsetpath.replace('/','__') \
124  + ".root"
125 
126 crab_cfg.write("number_of_jobs=" + str(njobs) + "\n")
127 crab_cfg.write("pset=" + pyout_name + "\n")
128 crab_cfg.write("output_file=" + rootfile + "\n")
129 crab_cfg.write("datasetpath=" + dsetpath + "\n")
130 
131 
132 crab_cfg.close()
133 
134 #os.system("cat " + pyout_name)
135 #print "Created crab conf:\t", crab_name,"\n"
136 
137 print '\n\nCreated:\n\t %(pwd)s/%(pf)s \n\t %(pwd)s/%(cf)s' \
138  % {'pwd' : os.environ["PWD"], 'pf' : pyout_name, 'cf' : crab_name}
139 
140 print "Done."