2 '''Script that uploads to the new CMS conditions uploader.
3 Adapted to the new infrastructure from v6 of the upload.py script for the DropBox from Miguel Ojeda.
6 __author__ =
'Andreas Pfeiffer'
7 __copyright__ =
'Copyright 2015, CERN CMS'
8 __credits__ = [
'Giacomo Govi',
'Salvatore Di Guida',
'Miguel Ojeda',
'Andreas Pfeiffer']
9 __license__ =
'Unknown'
10 __maintainer__ =
'Andreas Pfeiffer'
11 __email__ =
'andreas.pfeiffer@cern.ch'
26 defaultBackend =
'online'
27 defaultHostname =
'cms-conddb-prod.cern.ch'
28 defaultDevHostname =
'cms-conddb-dev.cern.ch'
29 defaultUrlTemplate =
'https://%s/cmsDbUpload/'
30 defaultTemporaryFile =
'upload.tar.bz2'
31 defaultNetrcHost =
'ConditionUploader'
32 defaultWorkflow =
'offline'
45 '''A common HTTP exception.
47 self.code is the response HTTP code as an integer.
48 self.response is the response body (i.e. page).
57 self.
args = (response.split(
'<p>')[1].
split(
'</p>')[0], )
62 CERN_SSO_CURL_CAPATH =
'/etc/pki/tls/certs'
65 '''Class used for querying URLs using the HTTP protocol.
68 retryCodes = frozenset([502, 503])
75 self.
curl.setopt(self.
curl.COOKIEFILE,
'')
80 self.
curl.setopt(self.
curl.SSL_VERIFYPEER, 0)
81 self.
curl.setopt(self.
curl.SSL_VERIFYHOST, 2)
88 '''Returns the list of cookies.
90 return self.
curl.getinfo(self.
curl.INFO_COOKIELIST)
95 self.
curl.setopt(self.
curl.COOKIELIST,
'ALL')
99 '''Allows to set a base URL which will be prefixed to all the URLs
100 that will be queried later.
106 '''Allows to set a proxy.
108 self.
curl.setopt(self.
curl.PROXY, proxy)
112 '''Allows to set a timeout.
114 self.
curl.setopt(self.
curl.TIMEOUT, timeout)
118 '''Allows to set retries.
120 The retries are a sequence of the seconds to wait per retry.
122 The retries are done on:
123 * PyCurl errors (includes network problems, e.g. not being able
124 to connect to the host).
125 * 502 Bad Gateway (for the moment, to avoid temporary
126 Apache-CherryPy issues).
127 * 503 Service Temporarily Unavailable (for when we update
136 self.
curl.setopt(pycurl.URL, url)
137 self.
curl.setopt(pycurl.VERBOSE, 0)
147 self.
curl.setopt(pycurl.HTTPHEADER, [
'Accept: application/json'])
149 self.
curl.setopt(self.
curl.HTTPGET, 0)
151 response = cStringIO.StringIO()
152 self.
curl.setopt(pycurl.WRITEFUNCTION, response.write)
153 self.
curl.setopt(pycurl.USERPWD,
'%s:%s' % (username, password) )
155 logging.debug(
'going to connect to server at: %s' % url )
158 code = self.
curl.getinfo(pycurl.RESPONSE_CODE)
159 logging.debug(
'got: %s ',
str(code))
162 self.
token = json.loads( response.getvalue() )[
'token']
163 except Exception
as e:
164 logging.error(
'http::getToken> got error from server: %s ',
str(e) )
165 if 'No JSON object could be decoded' in str(e):
167 logging.error(
"error getting token: %s",
str(e))
170 logging.debug(
'token: %s', self.
token)
171 logging.debug(
'returning: %s', response.getvalue())
173 return response.getvalue()
175 def query(self, url, data = None, files = None, keepCookies = True):
176 '''Queries a URL, optionally with some data (dictionary).
178 If no data is specified, a GET request will be used.
179 If some data is specified, a POST request will be used.
181 If files is specified, it must be a dictionary like data but
182 the values are filenames.
184 By default, cookies are kept in-between requests.
186 A HTTPError exception is raised if the response's HTTP code is not 200.
195 data4log = copy.copy(data)
197 if 'password' in data4log.keys():
198 data4log[
'password'] =
'*'
200 retries = [0] + list(self.
retries)
203 logging.debug(
'Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
205 time.sleep(retries.pop(0))
208 self.
curl.setopt(self.
curl.URL, url)
209 self.
curl.setopt(self.
curl.HTTPGET, 1)
212 self.
curl.setopt(pycurl.USERPWD,
'%s:""' % (
str(self.
token), ) )
213 self.
curl.setopt(pycurl.HTTPHEADER, [
'Accept: application/json'])
215 if data
is not None or files
is not None:
221 finalData.update(data)
223 if files
is not None:
224 for (key, fileName)
in files.items():
225 finalData[key] = (self.
curl.FORM_FILE, fileName)
226 self.
curl.setopt( self.
curl.HTTPPOST, finalData.items() )
228 self.
curl.setopt(pycurl.VERBOSE, 0)
230 response = cStringIO.StringIO()
231 self.
curl.setopt(self.
curl.WRITEFUNCTION, response.write)
234 code = self.
curl.getinfo(self.
curl.RESPONSE_CODE)
236 if code
in self.retryCodes
and len(retries) > 0:
237 logging.debug(
'Retrying since we got the %s error code...', code)
241 raise HTTPError(code, response.getvalue())
243 return response.getvalue()
245 except pycurl.error
as e:
246 if len(retries) == 0:
248 logging.debug(
'Retrying since we got the %s pycurl exception...',
str(e))
253 tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
255 tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
256 tarInfo.uname = tarInfo.gname =
'root'
257 tarFile.addfile(tarInfo, fileobj)
260 '''Upload conditions to the CMS conditions uploader service.
263 def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
277 if socket.getfqdn().
strip().endswith(
'.cms'):
278 self.
http.setProxy(
'https://cmsproxy.cms:3128/')
280 '''Signs in the server.
283 logging.info(
'%s: Signing in user %s ...', self.
hostname, username)
286 except Exception
as e:
287 logging.error(
"Caught exception when trying to get token for user %s from %s: %s" % (username, self.
hostname,
str(e)) )
291 logging.error(
"could not get token for user %s from %s" % (username, self.
hostname) )
294 logging.debug(
"got: '%s'",
str(self.
token) )
303 '''Signs out the server.
306 logging.info(
'%s: Signing out...', self.
hostname)
311 def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
312 '''Uploads a file to the dropBox.
314 The filename can be without extension, with .db or with .txt extension.
315 It will be stripped and then both .db and .txt files are used.
318 basepath = filename.rsplit(
'.db', 1)[0].rsplit(
'.txt', 1)[0]
319 metadataFilename =
'%s.txt' % basepath
320 with open(metadataFilename,
'rb')
as metadataFile:
321 metadata = json.load( metadataFile )
324 destDb = metadata[
'destinationDatabase']
326 if destDb.startswith(
'oracle://cms_orcon_prod')
or destDb.startswith(
'oracle://cms_orcoff_prep'):
327 if destDb.startswith(
'oracle://cms_orcoff_prep'):
328 self.
setHost( defaultDevHostname )
331 ret = self.
_uploadFile(filename, backend, temporaryFile)
334 self.
setHost( defaultHostname )
337 logging.error(
"DestinationDatabase %s is not valid. Skipping the upload." %destDb)
340 def _uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
342 basepath = filename.rsplit(
'.db', 1)[0].rsplit(
'.txt', 1)[0]
343 basename = os.path.basename(basepath)
345 logging.debug(
'%s: %s: Creating tar file for upload ...', self.
hostname, basename)
348 tarFile = tarfile.open(temporaryFile,
'w:bz2')
350 with open(
'%s.db' % basepath,
'rb')
as data:
352 except Exception
as e:
353 msg =
'Error when creating tar file. \n'
354 msg +=
'Please check that you have write access to the directory you are running,\n'
355 msg +=
'and that you have enough space on this disk (df -h .)\n'
359 with tempfile.NamedTemporaryFile()
as metadata:
360 with open(
'%s.txt' % basepath,
'rb')
as originalMetadata:
361 json.dump(json.load(originalMetadata), metadata, sort_keys =
True, indent = 4)
368 logging.debug(
'%s: %s: Calculating hash...', self.
hostname, basename)
370 fileHash = hashlib.sha1()
371 with open(temporaryFile,
'rb')
as f:
373 data = f.read(4 * 1024 * 1024)
376 fileHash.update(data)
378 fileHash = fileHash.hexdigest()
379 fileInfo = os.stat(temporaryFile)
380 fileSize = fileInfo.st_size
382 logging.debug(
'%s: %s: Hash: %s', self.
hostname, basename, fileHash)
384 logging.info(
'%s: %s: Uploading file (%s, size %s) to the %s backend...', self.
hostname, basename, fileHash, fileSize, backend)
385 os.rename(temporaryFile, fileHash)
390 'fileName': basename,
394 'uploadedFile': fileHash,
397 except Exception
as e:
398 logging.error(
'Error from uploading: %s' %
str(e))
399 ret = json.dumps( {
"status": -1,
"upload" : {
'itemStatus' : { basename : {
'status':
'failed',
'info':
str(e)}}},
"error" :
str(e)} )
403 statusInfo = json.loads(ret)[
'upload']
404 logging.debug(
'upload returned: %s', statusInfo )
409 for tag, info
in statusInfo[
'itemStatus'].
items():
410 logging.debug(
'checking tag %s, info %s', tag,
str(json.dumps(info, indent=4,sort_keys=
True)) )
411 if 'ok' in info[
'status'].lower() :
413 logging.info(
'tag %s successfully uploaded', tag)
414 if 'skip' in info[
'status'].lower() :
415 skippedTags.append( tag )
416 logging.warning(
'found tag %s to be skipped. reason: \n ... \t%s ', tag, info[
'info'])
417 if 'fail' in info[
'status'].lower() :
418 failedTags.append( tag )
419 logging.error(
'found tag %s failed to upload. reason: \n ... \t%s ', tag, info[
'info'])
421 if len(okTags) > 0: logging.info (
"tags sucessfully uploaded: %s ",
str(okTags) )
422 if len(skippedTags) > 0: logging.warning(
"tags SKIPped to upload : %s ",
str(skippedTags) )
423 if len(failedTags) > 0: logging.error (
"tags FAILed to upload : %s ",
str(failedTags) )
425 fileLogURL =
'https://%s/logs/dropBox/getFileLog?fileHash=%s'
426 logging.info(
'file log at: %s', fileLogURL % (self.
hostname,fileHash))