CMS 3D CMS Logo

Functions | Variables

harvestRelVal Namespace Reference

Functions

def append_sample_mcrab
def check_dbs
def check_dset
def check_nevts_dset
def create_crab
def create_harvest
def create_mcrab
def find_dqmref
def get_cond_from_dsetpath
def get_name_from_dsetpath
def make_dbs_list
def make_dqmname
def make_harv_name
def print_def
def read_ds_file

Variables

tuple api = DbsApi(args)
string argin = ''
dictionary args = {}
tuple cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')
string crab_block
 do_reference = False
tuple dsetpaths = list()
string dsfile = ''
tuple dslproc = open("dset_processed.txt", 'w')
string f_crab = 'crab.cfg'
 Create harvest.py template.
string f_multi_crab = 'multicrab.cfg'
 Create harvest_n.py for individual datasets.
tuple harvfilelist = list()
 Print what has been created.
string input_type = ''
tuple is_dbs = check_dbs()
tuple optManager = DbsOptionParser()

Function Documentation

def harvestRelVal::append_sample_mcrab (   dsetp,
  fout 
)

Definition at line 215 of file harvestRelVal.py.

00216                                     :
00217     dqm = make_dqmname(dsetp)
00218     sample = get_name_from_dsetpath(dsetp)
00219     hf = make_harv_name(dsetp)
00220     if not os.path.exists(hf) :
00221         print 'problem creating multicrab, file', hf, 'does not exist'
00222         sys.exit(17)
00223     fout.write('\n\n[' + sample + ']')
00224     fout.write('\nCMSSW.pset=' + hf)
00225     fout.write('\nCMSSW.datasetpath=' + dsetp)
00226     fout.write('\nCMSSW.output_file=' + dqm)
00227 
00228     dqmref = find_dqmref(dsetp);
00229     if not dqmref == 'NONE' : 
00230         fout.write('\nUSER.additional_input_files=' + dqmref)

def harvestRelVal::check_dbs ( )

Definition at line 24 of file harvestRelVal.py.

00025                :
00026     if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
00027         return 0
00028     return 1

def harvestRelVal::check_dset ( )

Definition at line 96 of file harvestRelVal.py.

00097                  :
00098    #check cmssw consistency
00099    for s in dsetpaths:
00100        if s.find(cmssw_ver) == -1 :
00101            dsetpaths.remove(s)        
00102            print 'Inconsistency found with datset and cmssw version (', cmssw_ver, ')' \
00103                  ': \t ', s, ' has been removed.'
00104    #check conditions from dsetname
00105    for s in dsetpaths[:]: #nb:need to make a copy here!
00106        cond = get_cond_from_dsetpath(s)
00107        if cond  == 0 : 
00108            dsetpaths.remove(s)        
00109    #check list size
00110    nSamples = len(dsetpaths)
00111    if nSamples == 0 :
00112        print "Empty input list, exit."
00113        sys.exit(12)
00114    else :
00115        print 'Processing', nSamples, 'data sets.'
00116    #check event numbers
00117    nSampleEvts = list()
00118    for s in dsetpaths:
00119        nSampleEvts.append(check_nevts_dset(s))
00120    print 'number of events per dataset:', nSampleEvts

def harvestRelVal::check_nevts_dset (   dset)

Definition at line 29 of file harvestRelVal.py.

00030                           :
00031     if not is_dbs :
00032         return -1
00033     ntot=0
00034     for afile in api.listFiles(path=str(dset)):
00035         nevts = afile['NumberOfEvents']
00036         ntot += nevts
00037         #print "  %s" % afile['LogicalFileName']
00038     return ntot  

def harvestRelVal::create_crab (   ds)

Definition at line 231 of file harvestRelVal.py.

00232                     :
00233     dqmout = make_dqmname(ds)
00234     hf = make_harv_name(ds)
00235     out = open(f_crab, 'w')
00236     out.write(crab_block)
00237     out.write('\npset=' + hf)
00238     out.write('datasetpath=' + ds)
00239     out.write('\noutput_file=' + dqmout)
00240     out.close()

def harvestRelVal::create_harvest (   ds)

Definition at line 155 of file harvestRelVal.py.

00156                        :
00157     raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc  --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
00158     cmsdriver = raw_cmsdriver
00159     cond = get_cond_from_dsetpath(ds)
00160     if cond == 0 :
00161         print 'unexpected problem with conditions'
00162         sys.exit(50)
00163     cmsdriver = cmsdriver.replace('30X',cond)
00164     fin_name="harvest_HARVESTING_STARTUP.py"
00165     if ds.find('IDEAL') != -1 :
00166         cmsdriver = cmsdriver.replace('STARTUP','IDEAL')
00167         fin_name = fin_name.replace('STARTUP','IDEAL')
00168     if ds.find('FastSim') != -1:
00169         cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingFS')
00170     if ds.find('PileUp') != -1:
00171         cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingPU')
00172 
00173     #print "=>", cmsdriver, " fs?", ds.find('FastSim')
00174     if os.path.exists(fin_name) : 
00175         os.system("rm " + fin_name)
00176     print "executing cmsdriver command:\n\t", cmsdriver
00177     os.system(cmsdriver)
00178     if not os.path.exists(fin_name) : 
00179         print 'problem with cmsdriver file name'
00180         sys.exit(40)
00181     os.system("touch " + fin_name)
00182     hf = make_harv_name(ds)
00183     os.system('mv ' + fin_name + " " + hf)
00184     out = open(hf, 'a')
00185     out.write("\n\n##additions to cmsDriver output \n")
00186     out.write("process.dqmSaver.workflow = '" + ds + "'\n")
00187     if is_dbs :
00188         out.write("process.source.fileNames = cms.untracked.vstring(\n")
00189         for afile in api.listFiles(path=ds):
00190             out.write("  '%s',\n" % afile['LogicalFileName'])
00191         out.write(")\n")
00192 
00193     dqmref = find_dqmref(ds);
00194     if not dqmref == 'NONE' : 
00195         out.write("process.DQMStore.referenceFileName = '" + dqmref + "'\n")
00196         out.write("process.dqmSaver.referenceHandling = 'all'\n")
00197 
00198     out.close()

def harvestRelVal::create_mcrab (   set,
  fcrab,
  fout 
)

Definition at line 199 of file harvestRelVal.py.

00200                                   :
00201     out = open(fout, 'w')
00202     out.write('[MULTICRAB]')
00203     out.write('\ncfg=' + fcrab)
00204     out.write('\n\n[COMMON]')
00205     nevt = -1
00206     njob = 1
00207     out.write('\nCMSSW.total_number_of_events=' + (str)(nevt) )
00208     out.write('\nCMSSW.number_of_jobs=' + (str)(njob) )
00209     for s in set:
00210         append_sample_mcrab(s, out)
00211     out.close()    

def harvestRelVal::find_dqmref (   ds)

Definition at line 121 of file harvestRelVal.py.

00122                     :
00123     if not do_reference :
00124         return 'NONE'
00125     cp = cmssw_ver[-1:]
00126     ip = (int)(cp) - 1
00127     ref_ver = cmssw_ver.replace(cp,str(ip))
00128     #print "cms:", cmssw_ver, " cp:", cp, " ip:", ip, " new_ver:", ref_ver  
00129     ref_dir = "/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver + "/"
00130     ref_dsf = make_dqmname(ds.replace(cmssw_ver, ref_ver))
00131     gls = " | grep root | grep "
00132     #to accept crab appended _1.root in file names, nd skip versions/conditions
00133     gls += ref_dsf[:-25] 
00134     gls += "| awk '{print $9}' "
00135     #print "refds:", ref_dsf, " command: rfdir", ref_dir+gls
00136     command = "rfcp " + ref_dir  + "`rfdir " + ref_dir + gls + "` ."
00137     #print "command:", command
00138     os.system(command)
00139     tmpfile = "ref.txt"
00140     command = "ls -rtl *" + gls + " > " + tmpfile
00141     #print "command:", command
00142     os.system(command)
00143     the_ref = 'NONE'
00144     if os.path.exists(tmpfile) :
00145         fin = open(tmpfile,'r')
00146         ref = fin.readline().replace('\n','')
00147         #print "read ref:", ref, "exists?", os.path.exists(ref)
00148         fin.close()
00149         if os.path.exists(ref) :
00150             the_ref = ref
00151     else :
00152         the_ref = 'NONE'
00153     print "Found reference file:", the_ref
00154     return the_ref

def harvestRelVal::get_cond_from_dsetpath (   ds)

Definition at line 47 of file harvestRelVal.py.

00048                                :
00049     ca = ds.split('/')[2].replace(cmssw_ver+'_','').replace('IDEAL_','').replace('STARTUP_','').replace('_FastSim','')
00050     cb = ca[:ca.find('v')-1]
00051     if cb[0].find('3') == -1 or len(cb) > 3:
00052         print "problem extracting condition for", ds, " : ", cb, '(len:',len(cb),')'  
00053         if cb.find('31X') != -1:
00054             cb = '31X'
00055         elif cb.find('30X') != -1:
00056             cb = '30X'
00057         else:
00058             print "skipping", cb
00059             return 0
00060         print "condition found:", cb
00061     else :
00062         print "good condition for", ds, " : ", cb, '(len:',len(cb),')'      
00063     return cb
00064 

def harvestRelVal::get_name_from_dsetpath (   ds)

Definition at line 42 of file harvestRelVal.py.

00043                               :
00044     fs = ds.split('/')
00045     fa = fs[1].replace('RelVal','')
00046     return fa

def harvestRelVal::make_dbs_list (   dbslf)

Definition at line 65 of file harvestRelVal.py.

00066                          :
00067     if not is_dbs :
00068         return
00069     flis = open(dbslf,'w')
00070     for ads in api.listDatasetPaths() :
00071         if ads.find('RelVal') != -1 \
00072                or ads.find(cmssw_ver) != -1 \
00073                or ads.find("/GEN-SIM") != -1 : 
00074 #               and ads.find("/GEN-SIM-RECO") != -1 : 
00075             flis.write(ads + '\n')
00076     flis.close()
00077     print 'Generated dataset list', dbslf, 'from dbs.' 
00078     #exampe:
00079     #dbs lsd --path=/RelVal*/CMSSW_3_1_0_pre5*/GEN-SIM-RECO --url=https://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet > mylist.txt
00080     #dbslsd = "dbs lsd --path=/RelVal*/" + cmssw_ver + "*/GEN-SIM-RECO --url=https://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
00081     #os.system( '`' + dbslsd + ' > ' + dbslf + '`')

def harvestRelVal::make_dqmname (   s)

Definition at line 39 of file harvestRelVal.py.

00040                    :
00041     return  'DQM_V0001_R000000001' + s.replace('/','__') + '.root' 

def harvestRelVal::make_harv_name (   dset)

Definition at line 212 of file harvestRelVal.py.

00213                          :
00214     return 'harvest_' + get_name_from_dsetpath(dset) + '.py' 

def harvestRelVal::print_def ( )

Definition at line 17 of file harvestRelVal.py.

00018                :
00019     print "Usage:", sys.argv[0], "[list_of_processes]"
00020     print "Examples:"
00021     print "harvestRelVal.py"
00022     print "harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO"
00023     print "harvestRelVal.py <dataset_list.txt>"

def harvestRelVal::read_ds_file ( )

Definition at line 82 of file harvestRelVal.py.

00083                    :
00084     if not os.path.exists(dsfile) :
00085         print "problem reading file", dsfile
00086         sys.exit(30)
00087     fin = open(dsfile,'r')
00088     for dset in fin.readlines(): 
00089         d = dset.replace('\n','')
00090         if d.find('#') == -1 :
00091             dsetpaths.append(d)
00092         else :
00093             print 'skipping:', d
00094     fin.close()
00095     print 'Using data set list in ', dsfile


Variable Documentation

tuple harvestRelVal::api = DbsApi(args)

Definition at line 307 of file harvestRelVal.py.

Definition at line 269 of file harvestRelVal.py.

dictionary harvestRelVal::args = {}
tuple harvestRelVal::cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')

Definition at line 310 of file harvestRelVal.py.

Initial value:
00001 """
00002 [CRAB]
00003 jobtype = cmssw
00004 scheduler = glite
00005 
00006 [EDG]
00007 remove_default_blacklist=1
00008 rb = CERN
00009 
00010 [USER]
00011 return_data = 1
00012 #copy_data = 1
00013 #storage_element=srm-cms.cern.ch
00014 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
00015 #user_remote_dir=/user/n/nuno/test
00016 publish_data=0
00017 thresholdLevel=70
00018 eMail=nuno@cern.ch
00019 
00020 [CMSSW]
00021 total_number_of_events=-1
00022 show_prod = 1
00023 number_of_jobs=1
00024 """

Definition at line 241 of file harvestRelVal.py.

Definition at line 271 of file harvestRelVal.py.

tuple harvestRelVal::dsetpaths = list()

Definition at line 326 of file harvestRelVal.py.

string harvestRelVal::dsfile = ''

Definition at line 270 of file harvestRelVal.py.

tuple harvestRelVal::dslproc = open("dset_processed.txt", 'w')

Definition at line 348 of file harvestRelVal.py.

string harvestRelVal::f_crab = 'crab.cfg'

Create harvest.py template.

Create crab.cfg template

Definition at line 358 of file harvestRelVal.py.

string harvestRelVal::f_multi_crab = 'multicrab.cfg'

Create harvest_n.py for individual datasets.

Create multicrab.cfg

Definition at line 366 of file harvestRelVal.py.

Print what has been created.

Definition at line 371 of file harvestRelVal.py.

Definition at line 268 of file harvestRelVal.py.

tuple harvestRelVal::is_dbs = check_dbs()

Definition at line 293 of file harvestRelVal.py.

tuple harvestRelVal::optManager = DbsOptionParser()

Definition at line 302 of file harvestRelVal.py.