CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
upload_popcon.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 '''Script that uploads to the new dropBox.
3 '''
4 
5 __author__ = 'Miguel Ojeda'
6 __copyright__ = 'Copyright 2012, CERN CMS'
7 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
8 __license__ = 'Unknown'
9 __maintainer__ = 'Miguel Ojeda'
10 __email__ = 'mojedasa@cern.ch'
11 __version__ = 6
12 
13 
14 import os
15 import sys
16 import logging
17 import optparse
18 import hashlib
19 import tarfile
20 import netrc
21 import getpass
22 import errno
23 import sqlite3
24 import json
25 import tempfile
26 
27 
28 defaultBackend = 'online'
29 defaultHostname = 'cms-conddb-prod.cern.ch'
30 defaultUrlTemplate = 'https://%s/dropBox/'
31 defaultTemporaryFile = 'upload.tar.bz2'
32 defaultNetrcHost = 'DropBox'
33 defaultWorkflow = 'offline'
34 
35 
36 # common/http.py start (plus the "# Try to extract..." section bit)
37 import re
38 import time
39 import logging
40 import cStringIO
41 import HTMLParser
42 import urllib
43 
44 import pycurl
45 import copy
46 
47 
49  '''A CERN SSO exception.
50  '''
51 
52 
53 def _getCERNSSOCookies(url, secureTarget = True, secure = True):
54  '''Returns the required CERN SSO cookies for a URL using Kerberos.
55 
56  They can be used with any HTTP client (libcurl, wget, urllib...).
57 
58  If you wish to make secure SSL connections to the CERN SSO
59  (i.e. verify peers/hosts), you may need to install the CERN-CA-certs package.
60  Use secure == False to skip this (i.e. this is the same as curl
61  -k/--insecure). Not recommended: tell users to install them or use lxplus6.
62 
63  The same way, if you have a self-signed certificate in your target URL
64  you can use secureTarget == False as well. Note that this option
65  is provided in order to be able to use a secure SSL connection to CERN SSO,
66  even if the connection to your target URL is not secure. Note that
67  you will probably need the CERN-CA-certs package after you get a certificate
68  signed by the CERN CA (https://cern.ch/ca), even if you did not need it
69  for the CERN SSO.
70 
71  Note that this method *does* a query to the given URL if successful.
72 
73  This was implemented outside the HTTP class for two main reasons:
74 
75  * The only thing needed to use CERN SSO is the cookies, therefore
76  this function is useful alone as well (e.g. as a simple replacement
77  of the cern-get-sso-cookie script or as a Python port of
78  the WWW::CERNSSO::Auth Perl package -- this one does not write
79  any file and can be used in-memory, by the way).
80 
81  * We do not need to use the curl handler of the HTTP class.
82  This way we do not overwrite any options in that one and we use
83  only a temporary one here for getting the cookie.
84 
85  TODO: Support also Certificate/Key authentication.
86  TODO: Support also Username/Password authentication.
87  TODO: Review the error paths.
88  TODO: Why PERLSESSID was used in the original code?
89  TODO: Retry if timeouts are really common (?)
90  '''
91 
92  def perform():
93  response = cStringIO.StringIO()
94  curl.setopt(curl.WRITEFUNCTION, response.write)
95  curl.perform()
96  code = curl.getinfo(curl.RESPONSE_CODE)
97  response = response.getvalue()
98  effectiveUrl = curl.getinfo(curl.EFFECTIVE_URL)
99  return (code, response, effectiveUrl)
100 
101  # These constants and the original code came from the official CERN
102  # cern-get-sso-cookie script and WWW::CERNSSO::Auth Perl package.
103  VERSION = '0.4.2'
104  CERN_SSO_CURL_USER_AGENT_KRB = 'curl-sso-kerberos/%s' % VERSION
105  CERN_SSO_CURL_AUTHERR = 'HTTP Error 401.2 - Unauthorized'
106  CERN_SSO_CURL_ADFS_EP = '/adfs/ls/auth'
107  CERN_SSO_CURL_ADFS_SIGNIN = 'wa=wsignin1.0'
108  CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
109 
110  logging.debug('secureTarget = %s', secureTarget)
111  logging.debug('secure = %s', secure)
112 
113  curl = pycurl.Curl()
114 
115  # Store the cookies in memory, which we will retreive later on
116  curl.setopt(curl.COOKIEFILE, '')
117 
118  # This should not be needed, but sometimes requests hang 'forever'
119  curl.setopt(curl.TIMEOUT, 10)
120  curl.setopt(curl.CONNECTTIMEOUT, 10)
121 
122  # Ask curl to use Kerberos5 authentication
123  curl.setopt(curl.USERAGENT, CERN_SSO_CURL_USER_AGENT_KRB)
124  curl.setopt(curl.HTTPAUTH, curl.HTTPAUTH_GSSNEGOTIATE)
125  curl.setopt(curl.USERPWD, ':')
126 
127  # Follow location (and send the password along to other hosts,
128  # although we do not really send any password)
129  curl.setopt(curl.FOLLOWLOCATION, 1)
130  curl.setopt(curl.UNRESTRICTED_AUTH, 1)
131 
132  # We do not need the headers
133  curl.setopt(curl.HEADER, 0)
134 
135  # The target server has a valid certificate
136  if secureTarget:
137  curl.setopt(curl.SSL_VERIFYPEER, 1)
138  curl.setopt(curl.SSL_VERIFYHOST, 2)
139  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
140  else:
141  curl.setopt(curl.SSL_VERIFYPEER, 0)
142  curl.setopt(curl.SSL_VERIFYHOST, 0)
143 
144  # Fetch the url
145  logging.debug('Connecting to %s', url)
146  curl.setopt(curl.URL, url)
147  (code, response, effectiveUrl) = perform()
148 
149  if CERN_SSO_CURL_ADFS_EP not in effectiveUrl:
150  raise CERNSSOError('Not behind SSO or we already have the cookie.')
151 
152  # Do the manual redirection to the IDP
153  logging.debug('Redirected to IDP %s', effectiveUrl)
154 
155  # The CERN SSO servers have a valid certificate
156  if secure:
157  curl.setopt(curl.SSL_VERIFYPEER, 1)
158  curl.setopt(curl.SSL_VERIFYHOST, 2)
159  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
160  else:
161  curl.setopt(curl.SSL_VERIFYPEER, 0)
162  curl.setopt(curl.SSL_VERIFYHOST, 0)
163 
164  curl.setopt(curl.URL, effectiveUrl)
165  (code, response, effectiveUrl) = perform()
166 
167  if CERN_SSO_CURL_AUTHERR in response:
168  raise CERNSSOError('Authentication error: Redirected to IDP Authentication error %s' % effectiveUrl)
169 
170  match = re.search('form .+?action="([^"]+)"', response)
171  if not match:
172  raise CERNSSOError('Something went wrong: could not find the expected redirection form (do you have a valid Kerberos ticket? -- see klist and kinit).')
173 
174  # Do the JavaScript redirection via the form to the SP
175  spUrl = match.groups()[0]
176  logging.debug('Redirected (via form) to SP %s', spUrl)
177 
178  formPairs = re.findall('input type="hidden" name="([^"]+)" value="([^"]+)"', response)
179 
180  # Microsoft ADFS produces broken encoding in auth forms:
181  # '<' and '"' are encoded as '&lt;' and '&quot;' however
182  # '>' is *not* encoded. Does not matter here though, we just decode.
183  htmlParser = HTMLParser.HTMLParser()
184  formPairs = [(x[0], htmlParser.unescape(x[1])) for x in formPairs]
185 
186  # The target server has a valid certificate
187  if secureTarget:
188  curl.setopt(curl.SSL_VERIFYPEER, 1)
189  curl.setopt(curl.SSL_VERIFYHOST, 2)
190  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
191  else:
192  curl.setopt(curl.SSL_VERIFYPEER, 0)
193  curl.setopt(curl.SSL_VERIFYHOST, 0)
194 
195  curl.setopt(curl.URL, spUrl)
196  curl.setopt(curl.POSTFIELDS, urllib.urlencode(formPairs))
197  curl.setopt(curl.POST, 1)
198  (code, response, effectiveUrl) = perform()
199 
200  if CERN_SSO_CURL_ADFS_SIGNIN in effectiveUrl:
201  raise CERNSSOError('Something went wrong: still on the auth page.')
202 
203  # Return the cookies
204  return curl.getinfo(curl.INFO_COOKIELIST)
205 
206 
208  '''A common HTTP exception.
209 
210  self.code is the response HTTP code as an integer.
211  self.response is the response body (i.e. page).
212  '''
213 
214  def __init__(self, code, response):
215  self.code = code
216  self.response = response
217 
218  # Try to extract the error message if possible (i.e. known error page format)
219  try:
220  self.args = (response.split('<p>')[1].split('</p>')[0], )
221  except Exception:
222  self.args = (self.response, )
223 
224 
225 class HTTP(object):
226  '''Class used for querying URLs using the HTTP protocol.
227  '''
228 
229  retryCodes = frozenset([502, 503])
230 
231 
232  def __init__(self):
233  self.setBaseUrl()
234  self.setRetries()
235 
236  self.curl = pycurl.Curl()
237  self.curl.setopt(self.curl.COOKIEFILE, '')
238  self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
239  self.curl.setopt(self.curl.SSL_VERIFYHOST, 0)
240 
241 
242  def getCookies(self):
243  '''Returns the list of cookies.
244  '''
245 
246  return self.curl.getinfo(self.curl.INFO_COOKIELIST)
247 
248 
249  def discardCookies(self):
250  '''Discards cookies.
251  '''
252 
253  self.curl.setopt(self.curl.COOKIELIST, 'ALL')
254 
255 
256  def setBaseUrl(self, baseUrl = ''):
257  '''Allows to set a base URL which will be prefixed to all the URLs
258  that will be queried later.
259  '''
260 
261  self.baseUrl = baseUrl
262 
263 
264  def setProxy(self, proxy = ''):
265  '''Allows to set a proxy.
266  '''
267 
268  self.curl.setopt(self.curl.PROXY, proxy)
269 
270 
271  def setTimeout(self, timeout = 0):
272  '''Allows to set a timeout.
273  '''
274 
275  self.curl.setopt(self.curl.TIMEOUT, timeout)
276 
277 
278  def setRetries(self, retries = ()):
279  '''Allows to set retries.
280 
281  The retries are a sequence of the seconds to wait per retry.
282 
283  The retries are done on:
284  * PyCurl errors (includes network problems, e.g. not being able
285  to connect to the host).
286  * 502 Bad Gateway (for the moment, to avoid temporary
287  Apache-CherryPy issues).
288  * 503 Service Temporarily Unavailable (for when we update
289  the frontends).
290  '''
291 
292  self.retries = retries
293 
294 
295  def query(self, url, data = None, files = None, keepCookies = True):
296  '''Queries a URL, optionally with some data (dictionary).
297 
298  If no data is specified, a GET request will be used.
299  If some data is specified, a POST request will be used.
300 
301  If files is specified, it must be a dictionary like data but
302  the values are filenames.
303 
304  By default, cookies are kept in-between requests.
305 
306  A HTTPError exception is raised if the response's HTTP code is not 200.
307  '''
308 
309  if not keepCookies:
310  self.discardCookies()
311 
312  url = self.baseUrl + url
313 
314  # make sure the logs are safe ... at least somewhat :)
315  data4log = copy.copy(data)
316  if data4log:
317  if 'password' in data4log.keys():
318  data4log['password'] = '*'
319 
320  retries = [0] + list(self.retries)
321 
322  while True:
323  logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
324 
325  time.sleep(retries.pop(0))
326 
327  try:
328  self.curl.setopt(self.curl.URL, url)
329  self.curl.setopt(self.curl.HTTPGET, 1)
330 
331  if data is not None or files is not None:
332  # If there is data or files to send, use a POST request
333 
334  finalData = {}
335 
336  if data is not None:
337  finalData.update(data)
338 
339  if files is not None:
340  for (key, fileName) in files.items():
341  finalData[key] = (self.curl.FORM_FILE, fileName)
342 
343  self.curl.setopt(self.curl.HTTPPOST, finalData.items())
344 
345  response = cStringIO.StringIO()
346  self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
347  self.curl.perform()
348 
349  code = self.curl.getinfo(self.curl.RESPONSE_CODE)
350 
351  if code in self.retryCodes and len(retries) > 0:
352  logging.debug('Retrying since we got the %s error code...', code)
353  continue
354 
355  if code != 200:
356  raise HTTPError(code, response.getvalue())
357 
358  return response.getvalue()
359 
360  except pycurl.error as e:
361  if len(retries) == 0:
362  raise e
363 
364  logging.debug('Retrying since we got the %s pycurl exception...', str(e))
365 
366 
367  def addCERNSSOCookies(self, url, secureTarget = True, secure = True):
368  '''Adds the required CERN SSO cookies for a URL using Kerberos.
369 
370  After calling this, you can use query() for your SSO-protected URLs.
371 
372  This method will use your Kerberos ticket to sign in automatically
373  in CERN SSO (i.e. no password required).
374 
375  If you do not have a ticket yet, use kinit.
376 
377  If you wish to make secure SSL connections to the CERN SSO
378  (i.e. verify peers/hosts), you may need to install the CERN-CA-certs package.
379  Use secure == False to skip this (i.e. this is the same as curl
380  -k/--insecure). Not recommended: tell users to install them or use lxplus6.
381 
382  The same way, if you have a self-signed certificate in your target URL
383  you can use secureTarget == False as well. Note that this option
384  is provided in order to be able to use a secure SSL connection to CERN SSO,
385  even if the connection to your target URL is not secure. Note that
386  you will probably need the CERN-CA-certs package after you get a certificate
387  signed by the CERN CA (https://cern.ch/ca), even if you did not need it
388  for the CERN SSO.
389 
390  Note that this method *does* a query to the given URL if successful.
391 
392  Note that you may need different cookies for different URLs/applications.
393 
394  Note that this method may raise also CERNSSOError exceptions.
395  '''
396 
397  for cookie in _getCERNSSOCookies(self.baseUrl + url, secureTarget, secure):
398  self.curl.setopt(self.curl.COOKIELIST, cookie)
399 
400 # common/http.py end
401 
402 
403 def addToTarFile(tarFile, fileobj, arcname):
404  tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
405  tarInfo.mode = 0400
406  tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
407  tarInfo.uname = tarInfo.gname = 'root'
408  tarFile.addfile(tarInfo, fileobj)
409 
410 
412  '''A dropBox API class.
413  '''
414 
415  def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
416  self.hostname = hostname
417  self.http = HTTP()
418  self.http.setBaseUrl(urlTemplate % hostname)
419 
420 
421  def signInSSO(self, secure = True):
422  '''Signs in the server via CERN SSO.
423  '''
424 
425  if secure:
426  logging.info('%s: Signing in via CERN SSO...', self.hostname)
427  else:
428  logging.info('%s: Signing in via CERN SSO (insecure)...', self.hostname)
429 
430  # FIXME: Insecure connection to -prod until the certificates are fixed.
431  # The connection to the CERN SSO is still secure by default.
432  # On -dev and -int the certificates are installed properly.
433  secureTarget = True
434  if 'cms-conddb-prod' in self.hostname:
435  secureTarget = False
436 
437  # We also use the CERN CA certificate to verify the targets,
438  # so if we are not connecting securely to CERN SSO is because
439  # we do not have the CERN-CA-certs package, so we need to skip
440  # this as well.
441  #
442  # i.e. right now we have these options:
443  # secure == True, secureTarget == True with CERN CA cert, -dev and -int
444  # secure == True, secureTarget == False with CERN CA cert, -prod
445  # secure == False, secureTarget == False without CERN CA cert
446  if not secure:
447  secureTarget = False
448 
449  self.http.addCERNSSOCookies('signInSSO', secureTarget, secure)
450 
451 
452  def signIn(self, username, password):
453  '''Signs in the server.
454  '''
455 
456  logging.info('%s: Signing in...', self.hostname)
457  self.http.query('signIn', {
458  'username': username,
459  'password': password,
460  })
461 
462 
463  def signOut(self):
464  '''Signs out the server.
465  '''
466 
467  logging.info('%s: Signing out...', self.hostname)
468  self.http.query('signOut')
469 
470 
471  def _checkForUpdates(self):
472  '''Updates this script, if a new version is found.
473  '''
474 
475  logging.info('%s: Checking for updates...', self.hostname)
476  version = int(self.http.query('getUploadScriptVersion'))
477 
478  if version <= __version__:
479  logging.info('%s: Up to date.', self.hostname)
480  return
481 
482  logging.info('%s: There is a newer version (%s) than the current one (%s): Updating...', self.hostname, version, __version__)
483 
484  logging.info('%s: Downloading new version...', self.hostname)
485  uploadScript = self.http.query('getUploadScript')
486 
487  self.signOut()
488 
489  logging.info('%s: Saving new version...', self.hostname)
490  with open(sys.argv[0], 'wb') as f:
491  f.write(uploadScript)
492 
493  logging.info('%s: Executing new version...', self.hostname)
494  os.execl(sys.executable, *([sys.executable] + sys.argv))
495 
496 
497  def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
498  '''Uploads a file to the dropBox.
499 
500  The filename can be without extension, with .db or with .txt extension.
501  It will be stripped and then both .db and .txt files are used.
502  '''
503 
504  basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
505  basename = os.path.basename(basepath)
506 
507  logging.info('%s: %s: Creating tar file...', self.hostname, basename)
508 
509  tarFile = tarfile.open(temporaryFile, 'w:bz2')
510 
511  with open('%s.db' % basepath, 'rb') as data:
512  addToTarFile(tarFile, data, 'data.db')
513 
514  with tempfile.NamedTemporaryFile() as metadata:
515  with open('%s.txt' % basepath, 'rb') as originalMetadata:
516  json.dump(json.load(originalMetadata), metadata, sort_keys = True, indent = 4)
517 
518  metadata.seek(0)
519  addToTarFile(tarFile, metadata, 'metadata.txt')
520 
521  tarFile.close()
522 
523  logging.info('%s: %s: Calculating hash...', self.hostname, basename)
524 
525  fileHash = hashlib.sha1()
526  with open(temporaryFile, 'rb') as f:
527  while True:
528  data = f.read(4 * 1024 * 1024)
529 
530  if not data:
531  break
532 
533  fileHash.update(data)
534 
535  fileHash = fileHash.hexdigest()
536 
537  logging.info('%s: %s: Hash: %s', self.hostname, basename, fileHash)
538 
539  logging.info('%s: %s: Uploading file for the %s backend...', self.hostname, basename, backend)
540  os.rename(temporaryFile, fileHash)
541  self.http.query('uploadFile', {
542  'backend': backend,
543  'fileName': basename,
544  }, files = {
545  'uploadedFile': fileHash,
546  })
547  os.unlink(fileHash)
548 
549 
550 def getInput(default, prompt = ''):
551  '''Like raw_input() but with a default and automatic strip().
552  '''
553 
554  answer = raw_input(prompt)
555  if answer:
556  return answer.strip()
557 
558  return default.strip()
559 
560 
561 def getInputWorkflow(prompt = ''):
562  '''Like getInput() but tailored to get target workflows (synchronization options).
563  '''
564 
565  while True:
566  workflow = getInput(defaultWorkflow, prompt)
567 
568  if workflow in frozenset(['offline', 'hlt', 'express', 'prompt', 'pcl']):
569  return workflow
570 
571  logging.error('Please specify one of the allowed workflows. See above for the explanation on each of them.')
572 
573 
574 def getInputChoose(optionsList, default, prompt = ''):
575  '''Makes the user choose from a list of options.
576  '''
577 
578  while True:
579  index = getInput(default, prompt)
580 
581  try:
582  return optionsList[int(index)]
583  except ValueError:
584  logging.error('Please specify an index of the list (i.e. integer).')
585  except IndexError:
586  logging.error('The index you provided is not in the given list.')
587 
588 
589 def getInputRepeat(prompt = ''):
590  '''Like raw_input() but repeats if nothing is provided and automatic strip().
591  '''
592 
593  while True:
594  answer = raw_input(prompt)
595  if answer:
596  return answer.strip()
597 
598  logging.error('You need to provide a value.')
599 
600 
601 
602 
def _getCERNSSOCookies
list object
Definition: dbtoconf.py:77
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run