3 Script fetches files matching specified RegExps from DQM GUI.
5 Author: Albertas Gimbutas, Vilnius University (LT)
6 e-mail: albertasgim@gmail.com
26 from multiprocessing
import Pool, Queue, Process
27 from Queue
import Empty
28 from os.path
import basename, isfile
29 from optparse
import OptionParser
30 from urllib2
import build_opener, Request
33 from Utilities.RelMon.authentication
import X509CertOpen
35 from authentication
import X509CertOpen
39 """Returns the content of specified URL, which requires authentication.
40 If the content is bigger than 1MB, then save it to file.
43 url_file = opener.open(Request(url))
44 size = int(url_file.headers[
"Content-Length"])
47 filename = basename(url)
48 readed = url_file.read()
50 outfile = open(filename,
'wb')
54 filename = basename(url)
55 file_id = selected_files.index(filename)
57 if isfile(
"./%s" % filename):
58 print '%d. Exsits on disk. Skipping.' % (file_id +1)
61 print '%d. Downloading...' % (file_id +1)
62 file = open(filename,
'wb')
64 chunk = url_file.read(chunk_size)
68 chunk = url_file.read(chunk_size)
69 print '%d. Done.' % (file_id +1)
74 parser = OptionParser(usage=
'usage: %prog [options]')
75 parser.add_option(
'-d',
'--data', action=
'store_true', dest=
'is_from_data',
76 help=
'Fetch data relvals.')
77 parser.add_option(
'-m',
'--mc', action=
'store_false', dest=
'is_from_data',
78 help=
'Fetch Monte Carlo relvals.')
79 parser.add_option(
'-r',
'--release', action=
'store', dest=
'release',
80 help=
'Release to fetch from. RELEASE format "CMSSW_x_x_x", e.g. CMSSW_5_3_2.')
81 parser.add_option(
'-e',
'--re',
'--regexp', action=
'store', dest=
'regexp', default=
'',
82 help=
'Comma separated regular expresions for file names. e.g. to fetch '+
83 'files, which names contain "cos" or "jet" and does not contain "2010", use: '+
84 '"cos,jet,^((?!2010).)*$".')
85 parser.add_option(
'--mthreads', action=
'store', default=
'3', dest=
'mthreads',
86 help=
'Number of threads for file download. Default is 3.')
87 parser.add_option(
'--dry', action=
'store_true', default=
False, dest=
'dry_run',
88 help=
'Show files matched by regular expresion, but do not download them.')
90 (options, args) = parser.parse_args()
91 options.release = options.release.strip(
'"\'=')
92 options.regexp = options.regexp.strip(
'"\'=')
95 if options.is_from_data
is None:
96 parser.error(
'You have to specify the directory, use --mc for "RelVal" or ' +
97 '--data for "RelValData"')
98 elif options.release
is None:
99 parser.error(
'You have to specify the CMSSW release, use --release option. ' +
100 'E.g. --release CMSSW_5_3_2')
101 elif not options.mthreads.isdigit():
102 parser.error(
'Bad --mthreads argument format. It has to be integer. E.g. ' +
107 if options.is_from_data:
108 relvaldir =
"RelValData"
110 release = re.findall(
'(CMSSW_\d*_\d*_)\d*(?:_[\w\d]*)?', options.release)
112 parser.error(
'No such CMSSW release found. Please check the ``--release`` commandline option value.')
113 releasedir = release[0] +
"x"
115 base_url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/'
116 filedir_url = base_url + relvaldir +
'/' + releasedir +
'/'
122 file_list_re = re.compile(
r"<a href='[-./\w]*'>([-./\w]*)<")
123 all_files = file_list_re.findall(filedir_html)[1:]
125 options.mthreads = int(options.mthreads)
126 if options.mthreads > 3
or options.mthreads < 1:
130 file_res = [re.compile(r)
for r
in options.regexp.split(
',') + [options.release]]
131 selected_files = [f
for f
in all_files
if all([r.search(f)
for r
in file_res])]
133 print 'Downloading files:'
134 for i, name
in enumerate(selected_files):
135 print '%d. %s' % (i+1, name)
137 if not options.dry_run:
139 pool = Pool(options.mthreads)
140 pool.map(auth_wget, [filedir_url + name
for name
in selected_files])