1 from __future__
import print_function
34 import httplib, urllib, urllib2, types, string
35 import Utilities.General.cmssw_das_client
as das_client
37 from json
import loads, dumps
39 if 'DD_SOURCE' not in os.environ:
40 os.environ[
'DD_SOURCE'] =
'das'
41 if 'DD_RELEASE' not in os.environ:
42 os.environ[
'DD_RELEASE'] =
''
43 if 'DD_SAMPLE' not in os.environ:
44 os.environ[
'DD_SAMPLE'] =
''
45 if 'DD_COND' not in os.environ:
46 os.environ[
'DD_COND'] =
''
47 if 'DD_TIER' not in os.environ:
48 os.environ[
'DD_TIER'] =
''
49 if 'DD_TIER_SECONDARY' not in os.environ:
50 os.environ[
'DD_TIER_SECONDARY'] =
''
51 if 'DD_RUN' not in os.environ:
52 os.environ[
'DD_RUN'] =
''
54 dd_release_re = re.compile(os.environ[
'DD_RELEASE'].
replace(
'*',
'.*'));
55 dd_sample_re = re.compile(os.environ[
'DD_SAMPLE'].
replace(
'*',
'.*'));
56 dd_cond_re = re.compile(os.environ[
'DD_COND'].
replace(
'*',
'.*'));
57 dd_run_re = re.compile(os.environ[
'DD_RUN'].
replace(
'*',
'.*'));
61 dd_tier_re = re.compile(dd_tier.replace(
'*',
'.*'));
63 if os.environ[
'DD_SOURCE'] ==
"das":
65 query =
"dataset instance=cms_dbs_prod_global"
66 if os.environ[
'DD_RELEASE'] !=
"":
67 query = query +
" release=" + os.environ[
'DD_RELEASE']
68 if os.environ[
'DD_SAMPLE'] !=
"":
69 query = query +
" primary_dataset=" + os.environ[
'DD_SAMPLE']
71 query = query +
" tier=" + dd_tier
72 if os.environ[
'DD_COND'] !=
"":
73 query = query +
" dataset=*" + os.environ[
'DD_COND'] +
"*"
74 if os.environ[
'DD_RUN'] !=
"":
75 query = query +
" run=" + os.environ[
'DD_RUN']
90 if data[
'nresults'] == 0:
91 print(
'[electronDataDiscovery.py] No DAS dataset for query:', query)
93 while data[
'nresults'] > 1:
94 if data[
'data'][0][
'dataset'][0][
'name'] == data[
'data'][1][
'dataset'][0][
'name']:
98 print(
'[electronDataDiscovery.py] Several DAS datasets for query:', query)
99 for i
in range(data[
'nresults']):
101 '[electronDataDiscovery.py] dataset[' +
str(i) +
']: ' + data[
'data'][i][
'dataset'][0][
'name'])
104 dataset = data[
'data'][0][
'dataset'][0][
'name']
106 query =
"file instance=cms_dbs_prod_global dataset=" + dataset
117 data = das_client.json.loads(
das_client.get_data(
'https://cmsweb.cern.ch', query, 0, 0, 0))
119 if data[
'nresults'] == 0:
120 print(
'[electronDataDiscovery.py] No DAS file in dataset:', dataset)
123 print(
'there is %d results' % nresults)
126 for i
in range(0, data[
'nresults']):
127 result.append(
str(data[
'data'][i][
'file'][0][
'name']))
129 elif os.environ[
'DD_SOURCE'] ==
"dbs":
132 separator =
" where "
133 if os.environ[
'DD_RELEASE'] !=
"":
134 input = input + separator +
"release = " + os.environ[
'DD_RELEASE']
136 if os.environ[
'DD_SAMPLE'] !=
"":
137 input = input + separator +
"primds = " + os.environ[
'DD_SAMPLE']
139 if os.environ[
'DD_RUN'] !=
"":
140 input = input + separator +
"run = " + os.environ[
'DD_RUN']
142 input = input + separator +
"dataset like *" + os.environ[
'DD_COND'] +
"*" + dd_tier +
"*"
145 'dbs search --url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "' + input +
'"')
146 datalines = data.readlines()
149 for line
in datalines:
151 if line !=
"" and line[0] ==
"/":
154 elif os.environ[
'DD_SOURCE'] ==
"http":
157 separator =
" where "
158 if os.environ[
'DD_RELEASE'] !=
"":
159 input = input + separator +
"release = " + os.environ[
'DD_RELEASE']
161 if os.environ[
'DD_SAMPLE'] !=
"":
162 input = input + separator +
"primds = " + os.environ[
'DD_SAMPLE']
164 if os.environ[
'DD_RUN'] !=
"":
165 input = input + separator +
"run = " + os.environ[
'DD_RUN']
167 input = input + separator +
"dataset like *" + os.environ[
'DD_COND'] +
"*" + dd_tier +
"*"
169 url =
"https://cmsweb.cern.ch:443/dbs_discovery/aSearch"
170 final_input = urllib.quote(input);
172 agent =
"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
173 ctypes =
"text/plain"
174 headers = {
'User-Agent': agent,
'Accept': ctypes}
175 params = {
'dbsInst':
'cms_dbs_prod_global',
176 'html': 0,
'caseSensitive':
'on',
'_idx': 0,
'pagerStep': -1,
177 'userInput': final_input,
178 'xml': 0,
'details': 0,
'cff': 0,
'method':
'dbsapi'}
179 data = urllib.urlencode(params, doseq=
True)
180 req = urllib2.Request(url, data, headers)
184 response = urllib2.urlopen(req)
185 data = response.read()
186 except urllib2.HTTPError
as e:
194 datalines = data.readlines()
197 for line
in datalines:
199 if line !=
"" and line[0] ==
"/":
202 elif os.environ[
'DD_SOURCE'] ==
"lsf":
204 dbs_path =
'/' + os.environ[
'DD_SAMPLE'] +
'/' + os.environ[
'DD_RELEASE'] +
'-' + os.environ[
'DD_COND'] +
'/' + \
205 os.environ[
'DD_TIER'] +
'"'
206 if __name__ ==
"__main__":
207 print(
'dbs path:', dbs_path)
208 data = os.popen(
'dbs lsf --path="' + dbs_path +
'"')
209 datalines = data.readlines()
212 for line
in datalines:
214 if line !=
"" and line[0] ==
"/":
217 elif os.environ[
'DD_SOURCE'].startswith(
'/castor/cern.ch/cms/'):
219 castor_dir = os.environ[
'DD_SOURCE'].
replace(
'/castor/cern.ch/cms/',
'/', 1)
221 data = os.popen(
'rfdir /castor/cern.ch/cms' + castor_dir)
222 subdirs = data.readlines()
227 subdir = line.split()[8]
228 data = os.popen(
'rfdir /castor/cern.ch/cms' + castor_dir +
'/' + subdir)
229 datalines = data.readlines()
230 for line
in datalines:
232 file = line.split()[8]
234 result.append(castor_dir +
'/' + subdir +
'/' + file)
237 elif os.environ[
'DD_SOURCE'].startswith(
'/eos/cms/'):
239 data = os.popen(
'eos find -f ' + os.environ[
'DD_SOURCE'])
240 lines = data.readlines()
244 line = line.strip().
replace(
'/eos/cms/',
'/', 1)
245 if line ==
"":
continue
246 if dd_sample_re.search(line) ==
None:
continue
247 if dd_cond_re.search(line) ==
None:
continue
248 if dd_tier_re.search(line) ==
None:
continue
249 if dd_run_re.search(line) ==
None:
continue
255 for line
in open(os.environ[
'DD_SOURCE']).readlines():
256 line = os.path.expandvars(line.strip())
257 if line ==
"":
continue
258 if dd_sample_re.search(line) ==
None:
continue
259 if dd_cond_re.search(line) ==
None:
continue
260 if dd_tier_re.search(line) ==
None:
continue
261 if dd_run_re.search(line) ==
None:
continue
265 diag =
'[electronDataDiscovery.py] No more files after filtering with :'
266 if os.environ[
'DD_SAMPLE'] !=
'': diag +=
' ' + os.environ[
'DD_SAMPLE']
267 if os.environ[
'DD_COND'] !=
'': diag +=
' ' + os.environ[
'DD_COND']
268 if dd_tier !=
'': diag +=
' ' + dd_tier
269 if os.environ[
'DD_RUN'] !=
'': diag +=
' ' + os.environ[
'DD_RUN']
276 print(
'search in %s' %
'DD_TIER')
286 cmd =
'dasgoclient --query="file dataset=DATA instance=DBS" | sort'
287 cmd2 = cmd.replace(
'DATA', data).
replace(
'DBS', dbs)
288 files = os.popen(cmd2).
read()
290 flist = files.split(
'\n')