CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
upload_popcon.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 '''Script that uploads to the new CMS conditions uploader.
3 Adapted to the new infrastructure from v6 of the upload.py script for the DropBox from Miguel Ojeda.
4 '''
5 
6 __author__ = 'Andreas Pfeiffer'
7 __copyright__ = 'Copyright 2015, CERN CMS'
8 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
9 __license__ = 'Unknown'
10 __maintainer__ = 'Andreas Pfeiffer'
11 __email__ = 'andreas.pfeiffer@cern.ch'
12 
13 
14 import os
15 import sys
16 import optparse
17 import hashlib
18 import tarfile
19 import netrc
20 import getpass
21 import errno
22 import sqlite3
23 import json
24 import tempfile
25 
26 defaultBackend = 'online'
27 defaultHostname = 'cms-conddb-prod.cern.ch'
28 defaultDevHostname = 'cms-conddb-dev.cern.ch'
29 defaultUrlTemplate = 'https://%s/cmsDbUpload/'
30 defaultTemporaryFile = 'upload.tar.bz2'
31 defaultNetrcHost = 'ConditionUploader'
32 defaultWorkflow = 'offline'
33 
34 # common/http.py start (plus the "# Try to extract..." section bit)
35 import time
36 import logging
37 import cStringIO
38 
39 import pycurl
40 import socket
41 import copy
42 
43 
45  '''A common HTTP exception.
46 
47  self.code is the response HTTP code as an integer.
48  self.response is the response body (i.e. page).
49  '''
50 
51  def __init__(self, code, response):
52  self.code = code
53  self.response = response
54 
55  # Try to extract the error message if possible (i.e. known error page format)
56  try:
57  self.args = (response.split('<p>')[1].split('</p>')[0], )
58  except Exception:
59  self.args = (self.response, )
60 
61 
62 CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
63 
64 class HTTP(object):
65  '''Class used for querying URLs using the HTTP protocol.
66  '''
67 
68  retryCodes = frozenset([502, 503])
69 
70  def __init__(self):
71  self.setBaseUrl()
72  self.setRetries()
73 
74  self.curl = pycurl.Curl()
75  self.curl.setopt(self.curl.COOKIEFILE, '') # in memory
76 
77  #-toDo: make sure we have the right options set here to use ssl
78  #-review(2015-09-25): check and see - action: AP
79  # self.curl.setopt(self.curl.SSL_VERIFYPEER, 1)
80  self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
81  self.curl.setopt(self.curl.SSL_VERIFYHOST, 2)
82 
83  self.baseUrl = None
84 
85  self.token = None
86 
87  def getCookies(self):
88  '''Returns the list of cookies.
89  '''
90  return self.curl.getinfo(self.curl.INFO_COOKIELIST)
91 
92  def discardCookies(self):
93  '''Discards cookies.
94  '''
95  self.curl.setopt(self.curl.COOKIELIST, 'ALL')
96 
97 
98  def setBaseUrl(self, baseUrl = ''):
99  '''Allows to set a base URL which will be prefixed to all the URLs
100  that will be queried later.
101  '''
102  self.baseUrl = baseUrl
103 
104 
105  def setProxy(self, proxy = ''):
106  '''Allows to set a proxy.
107  '''
108  self.curl.setopt(self.curl.PROXY, proxy)
109 
110 
111  def setTimeout(self, timeout = 0):
112  '''Allows to set a timeout.
113  '''
114  self.curl.setopt(self.curl.TIMEOUT, timeout)
115 
116 
117  def setRetries(self, retries = ()):
118  '''Allows to set retries.
119 
120  The retries are a sequence of the seconds to wait per retry.
121 
122  The retries are done on:
123  * PyCurl errors (includes network problems, e.g. not being able
124  to connect to the host).
125  * 502 Bad Gateway (for the moment, to avoid temporary
126  Apache-CherryPy issues).
127  * 503 Service Temporarily Unavailable (for when we update
128  the frontends).
129  '''
130  self.retries = retries
131 
132  def getToken(self, username, password):
133 
134  url = self.baseUrl + 'token'
135 
136  self.curl.setopt(pycurl.URL, url)
137  self.curl.setopt(pycurl.VERBOSE, 0)
138 
139  #-toDo: check if/why these are needed ...
140  #-ap: hmm ...
141  # self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
142  # self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
143  #-end hmmm ...
144  #-review(2015-09-25): check and see - action: AP
145 
146 
147  self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
148  # self.curl.setopt( self.curl.POST, {})
149  self.curl.setopt(self.curl.HTTPGET, 0)
150 
151  response = cStringIO.StringIO()
152  self.curl.setopt(pycurl.WRITEFUNCTION, response.write)
153  self.curl.setopt(pycurl.USERPWD, '%s:%s' % (username, password) )
154 
155  logging.debug('going to connect to server at: %s' % url )
156 
157  self.curl.perform()
158  code = self.curl.getinfo(pycurl.RESPONSE_CODE)
159  logging.debug('got: %s ', str(code))
160 
161  try:
162  self.token = json.loads( response.getvalue() )['token']
163  except Exception as e:
164  logging.error('http::getToken> got error from server: %s ', str(e) )
165  if 'No JSON object could be decoded' in str(e):
166  return None
167  logging.error("error getting token: %s", str(e))
168  return None
169 
170  logging.debug('token: %s', self.token)
171  logging.debug('returning: %s', response.getvalue())
172 
173  return response.getvalue()
174 
175  def query(self, url, data = None, files = None, keepCookies = True):
176  '''Queries a URL, optionally with some data (dictionary).
177 
178  If no data is specified, a GET request will be used.
179  If some data is specified, a POST request will be used.
180 
181  If files is specified, it must be a dictionary like data but
182  the values are filenames.
183 
184  By default, cookies are kept in-between requests.
185 
186  A HTTPError exception is raised if the response's HTTP code is not 200.
187  '''
188 
189  if not keepCookies:
190  self.discardCookies()
191 
192  url = self.baseUrl + url
193 
194  # make sure the logs are safe ... at least somewhat :)
195  data4log = copy.copy(data)
196  if data4log:
197  if 'password' in data4log.keys():
198  data4log['password'] = '*'
199 
200  retries = [0] + list(self.retries)
201 
202  while True:
203  logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
204 
205  time.sleep(retries.pop(0))
206 
207  try:
208  self.curl.setopt(self.curl.URL, url)
209  self.curl.setopt(self.curl.HTTPGET, 1)
210 
211  # from now on we use the token we got from the login
212  self.curl.setopt(pycurl.USERPWD, '%s:""' % ( str(self.token), ) )
213  self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
214 
215  if data is not None or files is not None:
216  # If there is data or files to send, use a POST request
217 
218  finalData = {}
219 
220  if data is not None:
221  finalData.update(data)
222 
223  if files is not None:
224  for (key, fileName) in files.items():
225  finalData[key] = (self.curl.FORM_FILE, fileName)
226  self.curl.setopt( self.curl.HTTPPOST, finalData.items() )
227 
228  self.curl.setopt(pycurl.VERBOSE, 0)
229 
230  response = cStringIO.StringIO()
231  self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
232  self.curl.perform()
233 
234  code = self.curl.getinfo(self.curl.RESPONSE_CODE)
235 
236  if code in self.retryCodes and len(retries) > 0:
237  logging.debug('Retrying since we got the %s error code...', code)
238  continue
239 
240  if code != 200:
241  raise HTTPError(code, response.getvalue())
242 
243  return response.getvalue()
244 
245  except pycurl.error as e:
246  if len(retries) == 0:
247  raise e
248  logging.debug('Retrying since we got the %s pycurl exception...', str(e))
249 
250 # common/http.py end
251 
252 def addToTarFile(tarFile, fileobj, arcname):
253  tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
254  tarInfo.mode = 0o400
255  tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
256  tarInfo.uname = tarInfo.gname = 'root'
257  tarFile.addfile(tarInfo, fileobj)
258 
259 class ConditionsUploader(object):
260  '''Upload conditions to the CMS conditions uploader service.
261  '''
262 
263  def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
264  self.hostname = hostname
265  self.urlTemplate = urlTemplate
266  self.userName = None
267  self.http = None
268  self.password = None
269 
270  def setHost( self, hostname ):
271  self.hostname = hostname
272 
273  def signIn(self, username, password):
274  ''' init the server.
275  '''
276  self.http = HTTP()
277  if socket.getfqdn().strip().endswith('.cms'):
278  self.http.setProxy('https://cmsproxy.cms:3128/')
279  self.http.setBaseUrl(self.urlTemplate % self.hostname)
280  '''Signs in the server.
281  '''
282 
283  logging.info('%s: Signing in user %s ...', self.hostname, username)
284  try:
285  self.token = self.http.getToken(username, password)
286  except Exception as e:
287  logging.error("Caught exception when trying to get token for user %s from %s: %s" % (username, self.hostname, str(e)) )
288  return False
289 
290  if not self.token:
291  logging.error("could not get token for user %s from %s" % (username, self.hostname) )
292  return False
293 
294  logging.debug( "got: '%s'", str(self.token) )
295  self.userName = username
296  self.password = password
297  return True
298 
299  def signInAgain(self):
300  return self.signIn( self.userName, self.password )
301 
302  def signOut(self):
303  '''Signs out the server.
304  '''
305 
306  logging.info('%s: Signing out...', self.hostname)
307  # self.http.query('logout')
308  self.token = None
309 
310 
311  def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
312  '''Uploads a file to the dropBox.
313 
314  The filename can be without extension, with .db or with .txt extension.
315  It will be stripped and then both .db and .txt files are used.
316  '''
317 
318  basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
319  metadataFilename = '%s.txt' % basepath
320  with open(metadataFilename, 'rb') as metadataFile:
321  metadata = json.load( metadataFile )
322  # When dest db = prep the hostname has to be set to dev.
323  forceHost = False
324  destDb = metadata['destinationDatabase']
325  ret = False
326  if destDb.startswith('oracle://cms_orcon_prod') or destDb.startswith('oracle://cms_orcoff_prep'):
327  if destDb.startswith('oracle://cms_orcoff_prep'):
328  self.setHost( defaultDevHostname )
329  self.signInAgain()
330  forceHost = True
331  ret = self._uploadFile(filename, backend, temporaryFile)
332  if forceHost:
333  # set back the hostname to the original global setting
334  self.setHost( defaultHostname )
335  self.signInAgain()
336  else:
337  logging.error("DestinationDatabase %s is not valid. Skipping the upload." %destDb)
338  return ret
339 
340  def _uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
341 
342  basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
343  basename = os.path.basename(basepath)
344 
345  logging.debug('%s: %s: Creating tar file for upload ...', self.hostname, basename)
346 
347  try:
348  tarFile = tarfile.open(temporaryFile, 'w:bz2')
349 
350  with open('%s.db' % basepath, 'rb') as data:
351  addToTarFile(tarFile, data, 'data.db')
352  except Exception as e:
353  msg = 'Error when creating tar file. \n'
354  msg += 'Please check that you have write access to the directory you are running,\n'
355  msg += 'and that you have enough space on this disk (df -h .)\n'
356  logging.error(msg)
357  raise Exception(msg)
358 
359  with tempfile.NamedTemporaryFile() as metadata:
360  with open('%s.txt' % basepath, 'rb') as originalMetadata:
361  json.dump(json.load(originalMetadata), metadata, sort_keys = True, indent = 4)
362 
363  metadata.seek(0)
364  addToTarFile(tarFile, metadata, 'metadata.txt')
365 
366  tarFile.close()
367 
368  logging.debug('%s: %s: Calculating hash...', self.hostname, basename)
369 
370  fileHash = hashlib.sha1()
371  with open(temporaryFile, 'rb') as f:
372  while True:
373  data = f.read(4 * 1024 * 1024)
374  if not data:
375  break
376  fileHash.update(data)
377 
378  fileHash = fileHash.hexdigest()
379  fileInfo = os.stat(temporaryFile)
380  fileSize = fileInfo.st_size
381 
382  logging.debug('%s: %s: Hash: %s', self.hostname, basename, fileHash)
383 
384  logging.info('%s: %s: Uploading file (%s, size %s) to the %s backend...', self.hostname, basename, fileHash, fileSize, backend)
385  os.rename(temporaryFile, fileHash)
386  try:
387  ret = self.http.query('uploadFile',
388  {
389  'backend': backend,
390  'fileName': basename,
391  'userName': self.userName,
392  },
393  files = {
394  'uploadedFile': fileHash,
395  }
396  )
397  except Exception as e:
398  logging.error('Error from uploading: %s' % str(e))
399  ret = json.dumps( { "status": -1, "upload" : { 'itemStatus' : { basename : {'status':'failed', 'info':str(e)}}}, "error" : str(e)} )
400 
401  os.unlink(fileHash)
402 
403  statusInfo = json.loads(ret)['upload']
404  logging.debug( 'upload returned: %s', statusInfo )
405 
406  okTags = []
407  skippedTags = []
408  failedTags = []
409  for tag, info in statusInfo['itemStatus'].items():
410  logging.debug('checking tag %s, info %s', tag, str(json.dumps(info, indent=4,sort_keys=True)) )
411  if 'ok' in info['status'].lower() :
412  okTags.append( tag )
413  logging.info('tag %s successfully uploaded', tag)
414  if 'skip' in info['status'].lower() :
415  skippedTags.append( tag )
416  logging.warning('found tag %s to be skipped. reason: \n ... \t%s ', tag, info['info'])
417  if 'fail' in info['status'].lower() :
418  failedTags.append( tag )
419  logging.error('found tag %s failed to upload. reason: \n ... \t%s ', tag, info['info'])
420 
421  if len(okTags) > 0: logging.info ("tags sucessfully uploaded: %s ", str(okTags) )
422  if len(skippedTags) > 0: logging.warning("tags SKIPped to upload : %s ", str(skippedTags) )
423  if len(failedTags) > 0: logging.error ("tags FAILed to upload : %s ", str(failedTags) )
424 
425  fileLogURL = 'https://%s/logs/dropBox/getFileLog?fileHash=%s'
426  logging.info('file log at: %s', fileLogURL % (self.hostname,fileHash))
427 
428  return len(okTags)>0
#define str(s)