3 Script fetches files matching specified RegExps from DQM GUI. 5 Author: Albertas Gimbutas, Vilnius University (LT) 6 e-mail: albertasgim@gmail.com 8 from __future__
import print_function
27 from multiprocessing
import Pool, Queue, Process
28 from Queue
import Empty
29 from os.path
import basename, isfile
30 from optparse
import OptionParser
31 from urllib2
import build_opener, Request
34 from Utilities.RelMon.authentication
import X509CertOpen
36 from authentication
import X509CertOpen
40 """Returns the content of specified URL, which requires authentication. 41 If the content is bigger than 1MB, then save it to file. 44 url_file = opener.open(Request(url))
45 size =
int(url_file.headers[
"Content-Length"])
49 readed = url_file.read()
51 outfile = open(filename,
'wb')
56 file_id = selected_files.index(filename)
58 if isfile(
"./%s" % filename):
59 print(
'%d. Exsits on disk. Skipping.' % (file_id +1))
62 print(
'%d. Downloading...' % (file_id +1))
63 file = open(filename,
'wb')
65 chunk = url_file.read(chunk_size)
69 chunk = url_file.read(chunk_size)
70 print(
'%d. Done.' % (file_id +1))
75 parser = OptionParser(usage=
'usage: %prog [options]')
76 parser.add_option(
'-d',
'--data', action=
'store_true', dest=
'is_from_data',
77 help=
'Fetch data relvals.')
78 parser.add_option(
'-m',
'--mc', action=
'store_false', dest=
'is_from_data',
79 help=
'Fetch Monte Carlo relvals.')
80 parser.add_option(
'-r',
'--release', action=
'store', dest=
'release',
81 help=
'Release to fetch from. RELEASE format "CMSSW_x_x_x", e.g. CMSSW_5_3_2.')
82 parser.add_option(
'-e',
'--re',
'--regexp', action=
'store', dest=
'regexp', default=
'',
83 help=
'Comma separated regular expresions for file names. e.g. to fetch '+
84 'files, which names contain "cos" or "jet" and does not contain "2010", use: '+
85 '"cos,jet,^((?!2010).)*$".')
86 parser.add_option(
'--mthreads', action=
'store', default=
'3', dest=
'mthreads',
87 help=
'Number of threads for file download. Default is 3.')
88 parser.add_option(
'--dry', action=
'store_true', default=
False, dest=
'dry_run',
89 help=
'Show files matched by regular expresion, but do not download them.')
91 (options, args) = parser.parse_args()
92 options.release = options.release.strip(
'"\'=')
93 options.regexp = options.regexp.strip(
'"\'=')
96 if options.is_from_data
is None:
97 parser.error(
'You have to specify the directory, use --mc for "RelVal" or ' +
98 '--data for "RelValData"')
99 elif options.release
is None:
100 parser.error(
'You have to specify the CMSSW release, use --release option. ' +
101 'E.g. --release CMSSW_5_3_2')
102 elif not options.mthreads.isdigit():
103 parser.error(
'Bad --mthreads argument format. It has to be integer. E.g. ' +
108 if options.is_from_data:
109 relvaldir =
"RelValData" 111 release = re.findall(
'(CMSSW_\d*_\d*_)\d*(?:_[\w\d]*)?', options.release)
113 parser.error(
'No such CMSSW release found. Please check the ``--release`` commandline option value.')
114 releasedir = release[0] +
"x" 116 base_url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/' 117 filedir_url = base_url + relvaldir +
'/' + releasedir +
'/' 123 file_list_re = re.compile(
r"<a href='[-./\w]*'>([-./\w]*)<")
124 all_files = file_list_re.findall(filedir_html)[1:]
126 options.mthreads =
int(options.mthreads)
127 if options.mthreads > 3
or options.mthreads < 1:
131 file_res = [re.compile(r)
for r
in options.regexp.split(
',') + [options.release]]
132 selected_files = [f
for f
in all_files
if all([r.search(f)
for r
in file_res])]
134 print(
'Downloading files:')
135 for i, name
in enumerate(selected_files):
136 print(
'%d. %s' % (i+1, name))
138 if not options.dry_run:
140 pool = Pool(options.mthreads)
141 pool.map(auth_wget, [filedir_url + name
for name
in selected_files])
S & print(S &os, JobReport::InputFile const &f)
def auth_wget(url, chunk_size=1048576)