CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_0/src/DQMOffline/EGamma/python/electronDataDiscovery.py

Go to the documentation of this file.
00001 
00002 #===================================================================
00003 # So to get the list of input files. One must call :
00004 #   search(), to get the list of primary files
00005 #   search2(), to get the list of eventual secondary files
00006 # 
00007 # The selection of files is configured thanks to shell
00008 # environment variables: 
00009 # 
00010 #   DD_RELEASE, for example CMSSW_2_2_0_pre1
00011 #   DD_SAMPLE, for example RelValSingleElectronPt35
00012 #   DD_RUN, for example ''
00013 #   DD_COND , for example MC_31X_V2-v1
00014 #   DD_TIER , for example RECO
00015 #   DD_TIER_SECONDARY, for eventual secondary files
00016 #   
00017 #   DD_SOURCE:
00018 #     das: use das
00019 #     dbs: use dbs search
00020 #     lsf: use dbs lsf
00021 #     /castor/cern.ch/cms/...: assumed to be the path of a castor directory containing the input data files
00022 #       for relvals: '/castor/cern.ch/cms/store/relval/${DD_RELEASE}/${DD_SAMPLE}/${DD_TIER}/${DD_COND}/'
00023 #       for harvested dqm: '/castor/cern.ch/cms/store/unmerged/dqm/${DD_SAMPLE}-${DD_RELEASE}-${DD_COND}-DQM-DQMHarvest-OfflineDQM'
00024 #     /eos/cms/...: assumed to be the path of a castor directory containing the input data files
00025 #       for relvals: '/eos/cms/store/relval/${DD_RELEASE}/${DD_SAMPLE}/${DD_TIER}/${DD_COND}/'
00026 #       for harvested dqm: '/eos/cms/store/unmerged/dqm/${DD_SAMPLE}-${DD_RELEASE}-${DD_COND}-DQM-DQMHarvest-OfflineDQM'
00027 #     /...: assumed to be the path of a text file containing the list of input data files
00028 #
00029 # All except DD_SOURCE can use wildcard *.
00030 #===================================================================
00031 
00032 #import httpslib, urllib, urllib2, types, string, os, sys
00033 import os, sys, re, das_client
00034 
00035 if not os.environ.has_key('DD_SOURCE'):
00036   os.environ['DD_SOURCE'] = 'das'
00037 if not os.environ.has_key('DD_RELEASE'):
00038   os.environ['DD_RELEASE'] = ''
00039 if not os.environ.has_key('DD_SAMPLE'):
00040   os.environ['DD_SAMPLE'] = ''
00041 if not os.environ.has_key('DD_COND'):
00042   os.environ['DD_COND'] = ''
00043 if not os.environ.has_key('DD_TIER'):
00044   os.environ['DD_TIER'] = ''
00045 if not os.environ.has_key('DD_TIER_SECONDARY'):
00046   os.environ['DD_TIER_SECONDARY'] = ''
00047 if not os.environ.has_key('DD_RUN'):
00048   os.environ['DD_RUN'] = ''
00049   
00050 dd_release_re = re.compile(os.environ['DD_RELEASE'].replace('*','.*')) ;
00051 dd_sample_re = re.compile(os.environ['DD_SAMPLE'].replace('*','.*')) ;
00052 dd_cond_re = re.compile(os.environ['DD_COND'].replace('*','.*')) ;
00053 dd_run_re = re.compile(os.environ['DD_RUN'].replace('*','.*')) ;
00054 
00055 def common_search(dd_tier):
00056 
00057   dd_tier_re = re.compile(dd_tier.replace('*','.*')) ;
00058 
00059   if os.environ['DD_SOURCE'] == "das":
00060   
00061     query = "dataset instance=cms_dbs_prod_global"
00062     if os.environ['DD_RELEASE'] != "" :
00063       query = query + " release=" + os.environ['DD_RELEASE']
00064     if os.environ['DD_SAMPLE'] != "":
00065       query = query + " primary_dataset=" + os.environ['DD_SAMPLE']
00066     if dd_tier != "":
00067       query = query + " tier=" + dd_tier
00068     if os.environ['DD_COND'] != "":
00069       query = query + " dataset=*" + os.environ['DD_COND'] + "*"
00070     if os.environ['DD_RUN'] != "":
00071       query = query + " run=" + os.environ['DD_RUN']
00072     
00073     #data = os.popen('das_client.py --limit=0 --query "'+query+'"')
00074     #datalines = data.readlines()
00075     #data.close()
00076     #datasets = []
00077     #for line in datalines:
00078     #  line = line.rstrip()
00079     #  if line != "" and line[0] =="/":
00080     #    datasets.append(line)
00081     #dataset = datasets[0]
00082     
00083     data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch',query,0,0,0))
00084             
00085     if data['nresults']==0:
00086       print '[electronDataDiscovery.py] No DAS dataset for query:', query
00087       return []
00088     if data['nresults']>1:
00089       print '[electronDataDiscovery.py] Several DAS datasets for query:', query
00090       return []
00091 
00092     dataset = data['data'][0]['dataset'][0]['name']
00093     
00094     query = "file instance=cms_dbs_prod_global dataset="+dataset
00095     
00096     #data = os.popen('das_client.py --limit=0 --query "'+query+'"')
00097     #datalines = data.readlines()
00098     #data.close()
00099     #result = []
00100     #for line in datalines:
00101     #  line = line.rstrip()
00102     #  if line != "" and line[0] =="/":
00103     #    result.append(line)
00104     
00105     data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch',query,0,0,0))
00106     
00107     if data['nresults']==0:
00108       print '[electronDataDiscovery.py] No DAS file in dataset:', dataset
00109       return []
00110       
00111     result = []
00112     for i in range(0,data['nresults']):
00113       result.append(str(data['data'][i]['file'][0]['name']))
00114     
00115   elif os.environ['DD_SOURCE'] == "dbs":
00116   
00117     input = "find file"
00118     separator = " where "
00119     if os.environ['DD_RELEASE'] != "":
00120       input = input + separator + "release = " + os.environ['DD_RELEASE']
00121       separator = " and "
00122     if os.environ['DD_SAMPLE'] != "":
00123       input = input + separator + "primds = " + os.environ['DD_SAMPLE']
00124       separator = " and "
00125     if os.environ['DD_RUN'] != "":
00126       input = input + separator + "run = " + os.environ['DD_RUN']
00127       separator = " and "
00128     input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
00129     
00130     data = os.popen('dbs search --url="https://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "'+input+'"')
00131     datalines = data.readlines()
00132     data.close()
00133     result = []
00134     for line in datalines:
00135       line = line.rstrip()
00136       if line != "" and line[0] =="/":
00137         result.append(line)
00138     
00139   elif os.environ['DD_SOURCE'] == "https":
00140   
00141     input = "find file"
00142     separator = " where "
00143     if os.environ['DD_RELEASE'] != "":
00144       input = input + separator + "release = " + os.environ['DD_RELEASE']
00145       separator = " and "
00146     if os.environ['DD_SAMPLE'] != "":
00147       input = input + separator + "primds = " + os.environ['DD_SAMPLE']
00148       separator = " and "
00149     if os.environ['DD_RUN'] != "":
00150       input = input + separator + "run = " + os.environ['DD_RUN']
00151       separator = " and "
00152     input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
00153     
00154     url = "https://cmsweb.cern.ch:443/dbs_discovery/aSearch"
00155     final_input = urllib.quote(input) ;
00156     
00157     agent   = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
00158     ctypes  = "text/plain"
00159     headers = { 'User-Agent':agent, 'Accept':ctypes}
00160     params  = {'dbsInst':'cms_dbs_prod_global',
00161                'html':0,'caseSensitive':'on','_idx':0,'pagerStep':-1,
00162                'userInput':final_input,
00163                'xml':0,'details':0,'cff':0,'method':'dbsapi'}
00164     data    = urllib.urlencode(params,doseq=True)
00165     req     = urllib2.Request(url, data, headers)
00166     data    = ""
00167     
00168     try:
00169       response = urllib2.urlopen(req)
00170       data = response.read()
00171     except urllib2.HTTPError, e:
00172       if e.code==201:
00173         print e.headers       
00174         print e.msg
00175         pass
00176       else:
00177         raise e
00178 
00179     datalines = data.readlines()
00180     data.close()
00181     result = []
00182     for line in datalines:
00183       line = line.rstrip()
00184       if line != "" and line[0] =="/":
00185         result.append(line)
00186     
00187   elif os.environ['DD_SOURCE'] == "lsf":
00188   
00189     dbs_path = '/'+os.environ['DD_SAMPLE']+'/'+os.environ['DD_RELEASE']+'-'+os.environ['DD_COND']+'/'+os.environ['DD_TIER']+'"'
00190     if __name__ == "__main__":
00191       print 'dbs path:',dbs_path
00192     data = os.popen('dbs lsf --path="'+dbs_path+'"')
00193     datalines = data.readlines()
00194     data.close()
00195     result = []
00196     for line in datalines:
00197       line = line.rstrip()
00198       if line != "" and line[0] =="/":
00199         result.append(line)
00200       
00201   elif os.environ['DD_SOURCE'].startswith('/castor/cern.ch/cms/'): # assumed to be a castor dir
00202   
00203     castor_dir = os.environ['DD_SOURCE'].replace('/castor/cern.ch/cms/','/',1)
00204     result = []
00205     data = os.popen('rfdir /castor/cern.ch/cms'+castor_dir)
00206     subdirs = data.readlines()
00207     data.close()
00208     datalines = []
00209     for line in subdirs:
00210       line = line.rstrip()
00211       subdir = line.split()[8]
00212       data = os.popen('rfdir /castor/cern.ch/cms'+castor_dir+'/'+subdir)
00213       datalines = data.readlines()
00214       for line in datalines:
00215         line = line.rstrip()
00216         file = line.split()[8]
00217         if file != "":
00218           result.append(castor_dir+'/'+subdir+'/'+file)
00219       data.close()
00220       
00221   elif os.environ['DD_SOURCE'].startswith('/eos/cms/'): # assumed to be an eos dir
00222   
00223     data = os.popen('/afs/cern.ch/project/eos/installation/pro/bin/eos.select find -f '+os.environ['DD_SOURCE'])
00224     lines = data.readlines()
00225     data.close()
00226     result = []
00227     for line in lines:
00228       line = line.strip().replace('/eos/cms/','/',1)
00229       if line == "": continue
00230       if dd_sample_re.search(line) == None: continue
00231       if dd_cond_re.search(line) == None: continue
00232       if dd_tier_re.search(line) == None: continue
00233       if dd_run_re.search(line) == None: continue
00234       result.append(line)
00235       
00236   else: # os.environ['DD_SOURCE'] is assumed to be a file name
00237   
00238     result = []
00239     for line in open(os.environ['DD_SOURCE']).readlines():
00240       line = os.path.expandvars(line.strip())
00241       if line == "": continue
00242       if dd_sample_re.search(line) == None: continue
00243       if dd_cond_re.search(line) == None: continue
00244       if dd_tier_re.search(line) == None: continue
00245       if dd_run_re.search(line) == None: continue
00246       result.append(line)
00247       
00248     if len(result)==0:
00249       diag = '[electronDataDiscovery.py] No more files after filtering with :'
00250       if os.environ['DD_SAMPLE']!='': diag += ' ' + os.environ['DD_SAMPLE']
00251       if os.environ['DD_COND']!='': diag += ' ' + os.environ['DD_COND']
00252       if dd_tier!='': diag += ' ' + dd_tier
00253       if os.environ['DD_RUN']!='': diag += ' ' + os.environ['DD_RUN']
00254       print diag
00255       
00256   return result
00257 
00258 def search():
00259   return common_search(os.environ['DD_TIER'])
00260 
00261 def search2():
00262   return common_search(os.environ['DD_TIER_SECONDARY'])
00263 
00264         
00265         
00266