CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
upload_popcon.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 '''Script that uploads to the new dropBox.
3 '''
4 
5 __author__ = 'Miguel Ojeda'
6 __copyright__ = 'Copyright 2012, CERN CMS'
7 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
8 __license__ = 'Unknown'
9 __maintainer__ = 'Miguel Ojeda'
10 __email__ = 'mojedasa@cern.ch'
11 __version__ = 6
12 
13 
14 import os
15 import sys
16 import logging
17 import optparse
18 import hashlib
19 import tarfile
20 import netrc
21 import getpass
22 import errno
23 import sqlite3
24 import json
25 import tempfile
26 
27 
28 defaultBackend = 'online'
29 defaultHostname = 'cms-conddb-prod.cern.ch'
30 defaultUrlTemplate = 'https://%s/dropBox/'
31 defaultTemporaryFile = 'upload.tar.bz2'
32 defaultNetrcHost = 'DropBox'
33 defaultWorkflow = 'offline'
34 
35 
36 # common/http.py start (plus the "# Try to extract..." section bit)
37 import re
38 import time
39 import logging
40 import cStringIO
41 import HTMLParser
42 import urllib
43 
44 import pycurl
45 import copy
46 
47 
49  '''A CERN SSO exception.
50  '''
51 
52 
53 def _getCERNSSOCookies(url, secureTarget = True, secure = True):
54  '''Returns the required CERN SSO cookies for a URL using Kerberos.
55 
56  They can be used with any HTTP client (libcurl, wget, urllib...).
57 
58  If you wish to make secure SSL connections to the CERN SSO
59  (i.e. verify peers/hosts), you may need to install the CERN-CA-certs package.
60  Use secure == False to skip this (i.e. this is the same as curl
61  -k/--insecure). Not recommended: tell users to install them or use lxplus6.
62 
63  The same way, if you have a self-signed certificate in your target URL
64  you can use secureTarget == False as well. Note that this option
65  is provided in order to be able to use a secure SSL connection to CERN SSO,
66  even if the connection to your target URL is not secure. Note that
67  you will probably need the CERN-CA-certs package after you get a certificate
68  signed by the CERN CA (https://cern.ch/ca), even if you did not need it
69  for the CERN SSO.
70 
71  Note that this method *does* a query to the given URL if successful.
72 
73  This was implemented outside the HTTP class for two main reasons:
74 
75  * The only thing needed to use CERN SSO is the cookies, therefore
76  this function is useful alone as well (e.g. as a simple replacement
77  of the cern-get-sso-cookie script or as a Python port of
78  the WWW::CERNSSO::Auth Perl package -- this one does not write
79  any file and can be used in-memory, by the way).
80 
81  * We do not need to use the curl handler of the HTTP class.
82  This way we do not overwrite any options in that one and we use
83  only a temporary one here for getting the cookie.
84 
85  TODO: Support also Certificate/Key authentication.
86  TODO: Support also Username/Password authentication.
87  TODO: Review the error paths.
88  TODO: Why PERLSESSID was used in the original code?
89  TODO: Retry if timeouts are really common (?)
90  '''
91 
92  def perform():
93  response = cStringIO.StringIO()
94  curl.setopt(curl.WRITEFUNCTION, response.write)
95  curl.perform()
96  code = curl.getinfo(curl.RESPONSE_CODE)
97  response = response.getvalue()
98  effectiveUrl = curl.getinfo(curl.EFFECTIVE_URL)
99  return (code, response, effectiveUrl)
100 
101  # These constants and the original code came from the official CERN
102  # cern-get-sso-cookie script and WWW::CERNSSO::Auth Perl package.
103  VERSION = '0.4.2'
104  CERN_SSO_CURL_USER_AGENT_KRB = 'curl-sso-kerberos/%s' % VERSION
105  CERN_SSO_CURL_AUTHERR = 'HTTP Error 401.2 - Unauthorized'
106  CERN_SSO_CURL_ADFS_EP = '/adfs/ls/auth'
107  CERN_SSO_CURL_ADFS_SIGNIN = 'wa=wsignin1.0'
108  CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
109 
110  logging.debug('secureTarget = %s', secureTarget)
111  logging.debug('secure = %s', secure)
112 
113  curl = pycurl.Curl()
114 
115  # Store the cookies in memory, which we will retreive later on
116  curl.setopt(curl.COOKIEFILE, '')
117 
118  # This should not be needed, but sometimes requests hang 'forever'
119  curl.setopt(curl.TIMEOUT, 10)
120  curl.setopt(curl.CONNECTTIMEOUT, 10)
121 
122  # Ask curl to use Kerberos5 authentication
123  curl.setopt(curl.USERAGENT, CERN_SSO_CURL_USER_AGENT_KRB)
124  curl.setopt(curl.HTTPAUTH, curl.HTTPAUTH_GSSNEGOTIATE)
125  curl.setopt(curl.USERPWD, ':')
126 
127  # Follow location (and send the password along to other hosts,
128  # although we do not really send any password)
129  curl.setopt(curl.FOLLOWLOCATION, 1)
130  curl.setopt(curl.UNRESTRICTED_AUTH, 1)
131 
132  # We do not need the headers
133  curl.setopt(curl.HEADER, 0)
134 
135  # The target server has a valid certificate
136  if secureTarget:
137  curl.setopt(curl.SSL_VERIFYPEER, 1)
138  curl.setopt(curl.SSL_VERIFYHOST, 2)
139  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
140  else:
141  curl.setopt(curl.SSL_VERIFYPEER, 0)
142  curl.setopt(curl.SSL_VERIFYHOST, 0)
143 
144  # Fetch the url
145  logging.debug('Connecting to %s', url)
146  curl.setopt(curl.URL, url)
147  (code, response, effectiveUrl) = perform()
148 
149  if CERN_SSO_CURL_ADFS_EP not in effectiveUrl:
150  raise CERNSSOError('Not behind SSO or we already have the cookie.')
151 
152  # Do the manual redirection to the IDP
153  logging.debug('Redirected to IDP %s', effectiveUrl)
154 
155  # The CERN SSO servers have a valid certificate
156  if secure:
157  curl.setopt(curl.SSL_VERIFYPEER, 1)
158  curl.setopt(curl.SSL_VERIFYHOST, 2)
159  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
160  else:
161  curl.setopt(curl.SSL_VERIFYPEER, 0)
162  curl.setopt(curl.SSL_VERIFYHOST, 0)
163 
164  curl.setopt(curl.URL, effectiveUrl)
165  (code, response, effectiveUrl) = perform()
166 
167  if CERN_SSO_CURL_AUTHERR in response:
168  raise CERNSSOError('Authentication error: Redirected to IDP Authentication error %s' % effectiveUrl)
169 
170  match = re.search('form .+?action="([^"]+)"', response)
171  if not match:
172  raise CERNSSOError('Something went wrong: could not find the expected redirection form (do you have a valid Kerberos ticket? -- see klist and kinit).')
173 
174  # Do the JavaScript redirection via the form to the SP
175  spUrl = match.groups()[0]
176  logging.debug('Redirected (via form) to SP %s', spUrl)
177 
178  formPairs = re.findall('input type="hidden" name="([^"]+)" value="([^"]+)"', response)
179 
180  # Microsoft ADFS produces broken encoding in auth forms:
181  # '<' and '"' are encoded as '&lt;' and '&quot;' however
182  # '>' is *not* encoded. Does not matter here though, we just decode.
183  htmlParser = HTMLParser.HTMLParser()
184  formPairs = [(x[0], htmlParser.unescape(x[1])) for x in formPairs]
185 
186  # The target server has a valid certificate
187  if secureTarget:
188  curl.setopt(curl.SSL_VERIFYPEER, 1)
189  curl.setopt(curl.SSL_VERIFYHOST, 2)
190  curl.setopt(curl.CAPATH, CERN_SSO_CURL_CAPATH)
191  else:
192  curl.setopt(curl.SSL_VERIFYPEER, 0)
193  curl.setopt(curl.SSL_VERIFYHOST, 0)
194 
195  curl.setopt(curl.URL, spUrl)
196  curl.setopt(curl.POSTFIELDS, urllib.urlencode(formPairs))
197  curl.setopt(curl.POST, 1)
198  (code, response, effectiveUrl) = perform()
199 
200  if CERN_SSO_CURL_ADFS_SIGNIN in effectiveUrl:
201  raise CERNSSOError('Something went wrong: still on the auth page.')
202 
203  # Return the cookies
204  return curl.getinfo(curl.INFO_COOKIELIST)
205 
206 
208  '''A common HTTP exception.
209 
210  self.code is the response HTTP code as an integer.
211  self.response is the response body (i.e. page).
212  '''
213 
214  def __init__(self, code, response):
215  self.code = code
216  self.response = response
217 
218  # Try to extract the error message if possible (i.e. known error page format)
219  try:
220  self.args = (response.split('<p>')[1].split('</p>')[0], )
221  except Exception:
222  self.args = (self.response, )
223 
224 
225 class HTTP(object):
226  '''Class used for querying URLs using the HTTP protocol.
227  '''
228 
229  retryCodes = frozenset([502, 503])
230 
231 
232  def __init__(self):
233  self.setBaseUrl()
234  self.setRetries()
235 
236  self.curl = pycurl.Curl()
237  self.curl.setopt(self.curl.COOKIEFILE, '')
238  self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
239  self.curl.setopt(self.curl.SSL_VERIFYHOST, 0)
240 
241 
242  def getCookies(self):
243  '''Returns the list of cookies.
244  '''
245 
246  return self.curl.getinfo(self.curl.INFO_COOKIELIST)
247 
248 
249  def discardCookies(self):
250  '''Discards cookies.
251  '''
252 
253  self.curl.setopt(self.curl.COOKIELIST, 'ALL')
254 
255 
256  def setBaseUrl(self, baseUrl = ''):
257  '''Allows to set a base URL which will be prefixed to all the URLs
258  that will be queried later.
259  '''
260 
261  self.baseUrl = baseUrl
262 
263 
264  def setProxy(self, proxy = ''):
265  '''Allows to set a proxy.
266  '''
267 
268  self.curl.setopt(self.curl.PROXY, proxy)
269 
270 
271  def setTimeout(self, timeout = 0):
272  '''Allows to set a timeout.
273  '''
274 
275  self.curl.setopt(self.curl.TIMEOUT, timeout)
276 
277 
278  def setRetries(self, retries = ()):
279  '''Allows to set retries.
280 
281  The retries are a sequence of the seconds to wait per retry.
282 
283  The retries are done on:
284  * PyCurl errors (includes network problems, e.g. not being able
285  to connect to the host).
286  * 502 Bad Gateway (for the moment, to avoid temporary
287  Apache-CherryPy issues).
288  * 503 Service Temporarily Unavailable (for when we update
289  the frontends).
290  '''
291 
292  self.retries = retries
293 
294 
295  def query(self, url, data = None, files = None, keepCookies = True):
296  '''Queries a URL, optionally with some data (dictionary).
297 
298  If no data is specified, a GET request will be used.
299  If some data is specified, a POST request will be used.
300 
301  If files is specified, it must be a dictionary like data but
302  the values are filenames.
303 
304  By default, cookies are kept in-between requests.
305 
306  A HTTPError exception is raised if the response's HTTP code is not 200.
307  '''
308 
309  if not keepCookies:
310  self.discardCookies()
311 
312  url = self.baseUrl + url
313 
314  # make sure the logs are safe ... at least somewhat :)
315  data4log = copy.copy(data)
316  if data4log:
317  if 'password' in data4log.keys():
318  data4log['password'] = '*'
319 
320  retries = [0] + list(self.retries)
321 
322  while True:
323  logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
324 
325  time.sleep(retries.pop(0))
326 
327  try:
328  self.curl.setopt(self.curl.URL, url)
329  self.curl.setopt(self.curl.HTTPGET, 1)
330 
331  if data is not None or files is not None:
332  # If there is data or files to send, use a POST request
333 
334  finalData = {}
335 
336  if data is not None:
337  finalData.update(data)
338 
339  if files is not None:
340  for (key, fileName) in files.items():
341  finalData[key] = (self.curl.FORM_FILE, fileName)
342 
343  self.curl.setopt(self.curl.HTTPPOST, finalData.items())
344 
345  response = cStringIO.StringIO()
346  self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
347  self.curl.perform()
348 
349  code = self.curl.getinfo(self.curl.RESPONSE_CODE)
350 
351  if code in self.retryCodes and len(retries) > 0:
352  logging.debug('Retrying since we got the %s error code...', code)
353  continue
354 
355  if code != 200:
356  raise HTTPError(code, response.getvalue())
357 
358  return response.getvalue()
359 
360  except pycurl.error as e:
361  if len(retries) == 0:
362  raise e
363 
364  logging.debug('Retrying since we got the %s pycurl exception...', str(e))
365 
366 
367  def addCERNSSOCookies(self, url, secureTarget = True, secure = True):
368  '''Adds the required CERN SSO cookies for a URL using Kerberos.
369 
370  After calling this, you can use query() for your SSO-protected URLs.
371 
372  This method will use your Kerberos ticket to sign in automatically
373  in CERN SSO (i.e. no password required).
374 
375  If you do not have a ticket yet, use kinit.
376 
377  If you wish to make secure SSL connections to the CERN SSO
378  (i.e. verify peers/hosts), you may need to install the CERN-CA-certs package.
379  Use secure == False to skip this (i.e. this is the same as curl
380  -k/--insecure). Not recommended: tell users to install them or use lxplus6.
381 
382  The same way, if you have a self-signed certificate in your target URL
383  you can use secureTarget == False as well. Note that this option
384  is provided in order to be able to use a secure SSL connection to CERN SSO,
385  even if the connection to your target URL is not secure. Note that
386  you will probably need the CERN-CA-certs package after you get a certificate
387  signed by the CERN CA (https://cern.ch/ca), even if you did not need it
388  for the CERN SSO.
389 
390  Note that this method *does* a query to the given URL if successful.
391 
392  Note that you may need different cookies for different URLs/applications.
393 
394  Note that this method may raise also CERNSSOError exceptions.
395  '''
396 
397  for cookie in _getCERNSSOCookies(self.baseUrl + url, secureTarget, secure):
398  self.curl.setopt(self.curl.COOKIELIST, cookie)
399 
400 # common/http.py end
401 
402 
403 def addToTarFile(tarFile, fileobj, arcname):
404  tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
405  tarInfo.mode = 0o400
406  tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
407  tarInfo.uname = tarInfo.gname = 'root'
408  tarFile.addfile(tarInfo, fileobj)
409 
410 
411 class DropBox(object):
412  '''A dropBox API class.
413  '''
414 
415  def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
416  self.hostname = hostname
417  self.http = HTTP()
418  self.http.setBaseUrl(urlTemplate % hostname)
419  os.environ['http_proxy'] = 'http://cmsproxy.cms:3128/'
420  os.environ['https_proxy'] = 'https://cmsproxy.cms:3128/'
421 
422  def signInSSO(self, secure = True):
423  '''Signs in the server via CERN SSO.
424  '''
425 
426  if secure:
427  logging.info('%s: Signing in via CERN SSO...', self.hostname)
428  else:
429  logging.info('%s: Signing in via CERN SSO (insecure)...', self.hostname)
430 
431  # FIXME: Insecure connection to -prod until the certificates are fixed.
432  # The connection to the CERN SSO is still secure by default.
433  # On -dev and -int the certificates are installed properly.
434  secureTarget = True
435  if 'cms-conddb-prod' in self.hostname:
436  secureTarget = False
437 
438  # We also use the CERN CA certificate to verify the targets,
439  # so if we are not connecting securely to CERN SSO is because
440  # we do not have the CERN-CA-certs package, so we need to skip
441  # this as well.
442  #
443  # i.e. right now we have these options:
444  # secure == True, secureTarget == True with CERN CA cert, -dev and -int
445  # secure == True, secureTarget == False with CERN CA cert, -prod
446  # secure == False, secureTarget == False without CERN CA cert
447  if not secure:
448  secureTarget = False
449 
450  self.http.addCERNSSOCookies('signInSSO', secureTarget, secure)
451 
452 
453  def signIn(self, username, password):
454  '''Signs in the server.
455  '''
456 
457  logging.info('%s: Signing in...', self.hostname)
458  self.http.query('signIn', {
459  'username': username,
460  'password': password,
461  })
462 
463 
464  def signOut(self):
465  '''Signs out the server.
466  '''
467 
468  logging.info('%s: Signing out...', self.hostname)
469  self.http.query('signOut')
470 
471 
472  def _checkForUpdates(self):
473  '''Updates this script, if a new version is found.
474  '''
475 
476  logging.info('%s: Checking for updates...', self.hostname)
477  version = int(self.http.query('getUploadScriptVersion'))
478 
479  if version <= __version__:
480  logging.info('%s: Up to date.', self.hostname)
481  return
482 
483  logging.info('%s: There is a newer version (%s) than the current one (%s): Updating...', self.hostname, version, __version__)
484 
485  logging.info('%s: Downloading new version...', self.hostname)
486  uploadScript = self.http.query('getUploadScript')
487 
488  self.signOut()
489 
490  logging.info('%s: Saving new version...', self.hostname)
491  with open(sys.argv[0], 'wb') as f:
492  f.write(uploadScript)
493 
494  logging.info('%s: Executing new version...', self.hostname)
495  os.execl(sys.executable, *([sys.executable] + sys.argv))
496 
497 
498  def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
499  '''Uploads a file to the dropBox.
500 
501  The filename can be without extension, with .db or with .txt extension.
502  It will be stripped and then both .db and .txt files are used.
503  '''
504 
505  basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
506  basename = os.path.basename(basepath)
507 
508  logging.info('%s: %s: Creating tar file...', self.hostname, basename)
509 
510  tarFile = tarfile.open(temporaryFile, 'w:bz2')
511 
512  with open('%s.db' % basepath, 'rb') as data:
513  addToTarFile(tarFile, data, 'data.db')
514 
515  with tempfile.NamedTemporaryFile() as metadata:
516  with open('%s.txt' % basepath, 'rb') as originalMetadata:
517  json.dump(json.load(originalMetadata), metadata, sort_keys = True, indent = 4)
518 
519  metadata.seek(0)
520  addToTarFile(tarFile, metadata, 'metadata.txt')
521 
522  tarFile.close()
523 
524  logging.info('%s: %s: Calculating hash...', self.hostname, basename)
525 
526  fileHash = hashlib.sha1()
527  with open(temporaryFile, 'rb') as f:
528  while True:
529  data = f.read(4 * 1024 * 1024)
530 
531  if not data:
532  break
533 
534  fileHash.update(data)
535 
536  fileHash = fileHash.hexdigest()
537 
538  logging.info('%s: %s: Hash: %s', self.hostname, basename, fileHash)
539 
540  logging.info('%s: %s: Uploading file for the %s backend...', self.hostname, basename, backend)
541  os.rename(temporaryFile, fileHash)
542  self.http.query('uploadPopcon', {
543  'backend': backend,
544  'fileName': basename,
545  }, files = {
546  'uploadedFile': fileHash,
547  })
548  os.unlink(fileHash)
549 
550 
551 def getInput(default, prompt = ''):
552  '''Like raw_input() but with a default and automatic strip().
553  '''
554 
555  answer = raw_input(prompt)
556  if answer:
557  return answer.strip()
558 
559  return default.strip()
560 
561 
562 def getInputWorkflow(prompt = ''):
563  '''Like getInput() but tailored to get target workflows (synchronization options).
564  '''
565 
566  while True:
567  workflow = getInput(defaultWorkflow, prompt)
568 
569  if workflow in frozenset(['offline', 'hlt', 'express', 'prompt', 'pcl']):
570  return workflow
571 
572  logging.error('Please specify one of the allowed workflows. See above for the explanation on each of them.')
573 
574 
575 def getInputChoose(optionsList, default, prompt = ''):
576  '''Makes the user choose from a list of options.
577  '''
578 
579  while True:
580  index = getInput(default, prompt)
581 
582  try:
583  return optionsList[int(index)]
584  except ValueError:
585  logging.error('Please specify an index of the list (i.e. integer).')
586  except IndexError:
587  logging.error('The index you provided is not in the given list.')
588 
589 
590 def getInputRepeat(prompt = ''):
591  '''Like raw_input() but repeats if nothing is provided and automatic strip().
592  '''
593 
594  while True:
595  answer = raw_input(prompt)
596  if answer:
597  return answer.strip()
598 
599  logging.error('You need to provide a value.')
600 
601 
602 
603 
def _getCERNSSOCookies
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run