CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
archive.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # import modules
4 import os, time, shutil, zipfile, commands, sys, glob
5 from datetime import datetime
6 # import ends
7 #
8 
9 #
10 # global variables for zipping and file transfer
11 # Directory Setup
12 dir = "/nfshome0/smaruyam/CMSSW_2_0_10/src/test/" # File Directory
13 dbdir = "/nfshome0/smaruyam/CMSSW_2_0_10/src/test/" # db Directory
14 arcdir = "/nfshome0/smaruyam/CMSSW_2_0_10/src/test/" # Zipped File Directory
15 cfgfile = " /nfshome0/smaruyam/CMSSW_2_0_10/src/test/myconfig.txt "# configuration file
16 # Directory Setup over
17 # Switches to en/disable functionalities
18 EnableFileRemoval = False
19 PathReplace = False
20 EnableTransfer = False
21 # Switches over
22 fileSizeThreshold = 1000000000# = 1GB(default) to get away from technicality of large zip file size
23 disk_threshold = 80#default 80% full
24 transferScript = "/nfshome0/tier0/scripts/injectFileIntoTransferSystem.pl"# T0 System Script
25 targetdir = "/castor/cern.ch/cms/store/dqm/" # Castor Store Area
26 cfgarg = " --config " + cfgfile
27 fullTransferArg = cfgarg + " --type dqm --hostname srv-C2D05-19 --lumisection 1 --appname CMSSW --appversion CMSSW_2_0_10 "
28 statusCheck = cfgarg + " --check --filename "
29 emptyString = "empty"
30 
31 # temporary fix for sqlite3 path
32 sqlite3 = "sqlite3 "
33 #
34 
35 # commond database path and log file
36 logfile = open('archival_log.txt', 'a')# log
37 tmpdb = dbdir + "tmp/tmp.db" # temporary db
38 bakdb = dbdir + "tmp/backup.db" # backup db
39 db = dbdir + "db.db" # db
40 # global variables definition ends
41 #
42 
43 #
44 # file register and un-register
45 # You don't need copy these!
46 """
47 Temporary Port form Hyunkwan's Un-Register-File Script
48 """
49 def filereg(db,bakdb,tmpdb,file,logfile):
50  if os.path.exists(tmpdb): os.remove(tmpdb)
51  shutil.copy(db,tmpdb)
52  logfile.write('*** File Register ***\n')
53  logfile.write(os.popen('visDQMRegisterFile '+ tmpdb +' "/Global/Online/ALL" "Global run" '+ file).read())
54  t = datetime.now()
55  tstamp = t.strftime("%Y%m%d")
56  a = glob.glob(bakdb+'.'+tstamp+'*');
57  if not len(a):
58  tstamp = t.strftime("%Y%m%d_%H%M%S")
59  bakdb = bakdb+'.'+tstamp
60  shutil.copy(tmpdb,bakdb)
61  shutil.move(tmpdb,db)
62  else:
63  shutil.move(tmpdb,db)
64 
65 def fileunreg(db,bakdb,tmpdb,oldfile,logfile):
66  if os.path.exists(tmpdb): os.remove(tmpdb)
67  shutil.copy(db,tmpdb)
68  logfile.write('*** File UnRegister ***\n')
69  logfile.write(os.popen('visDQMUnregisterFile '+ tmpdb +' ' + oldfile).read())
70  t = datetime.now()
71  tstamp = t.strftime("%Y%m%d")
72  a = glob.glob(bakdb+'.'+tstamp+'*');
73  if not len(a):
74  tstamp = t.strftime("%Y%m%d_%H%M%S")
75  bakdb = bakdb+'.'+tstamp
76  shutil.copy(tmpdb,bakdb)
77  shutil.move(tmpdb,db)
78  else:
79  shutil.move(tmpdb,db)
80 
81 # file register and un-register over
82 #
83 
84 #
85 # generic function
86 # check command exist status and retrive output messsage
87 """
88 Check and Return Output
89 """
90 def CheckCommand(cmd, logfile):
91  result = commands.getstatusoutput(cmd)
92  if result[0] == 0:
93  output = result[1]
94  return result
95  else :
96  logfile.write("Command Exits with non-zero Status," + str(result[0]) + " Error = " + result[1] + "\n")
97  return result
98 
99 # generic function over
100 #
101 
102 #
103 # disk use check
104 """
105 Disk Usage Check
106 Reference to Cleaner()
107 df out put is assumed as follows.
108 Filesystem Size Used Avail Use% Mounted on
109 /dev/sda3 73G 45G 25G 65% /
110 /dev/sda1 99M 12M 83M 12% /boot
111 none 2.0G 0 2.0G 0% /dev/shm
112 /dev/sdb1 917G 83G 788G 10% /data
113 cmsnfshome0:/nfshome0
114  805G 673G 133G 84% /cmsnfshome0/nfshome0
115 """
116 def DiskUsage(logfile) :
117  logfile.write(" *** Checking Disk Usage ***\n")
118  df_file=os.popen('df')
119  usage = False
120  lines = df_file.readlines()
121  list = lines[4].split() # 5th line from top. Split at tab or white space
122  string = list[4][:-1] # NEED check for the host
123  fusage = float(string)
124  if fusage > disk_threshold : # disk is more than 80% full
125  logfile.write("Disk Usage too high = " + string + "%\n")
126  usage = True
127  if usage == True :
128  Cleaner(logfile)
129  else :
130  logfile.write("Disk Usage is low enough = " + string + "%\n")
131 
132 # disk use check over
133 #
134 
135 #
136 # Confirm the path to Transferred file on Castor
137 """
138 Set Path to Castor
139 Reference to CheckPath(), filereg()
140 """
141 def SetPath(file, logfile):
142  path = CheckPath(file,logfile)
143  if cmp(path,emptyString) != 0:
144  newpath = "rfio:" + path
145  logfile.write("Register New Path " + newpath + "\n")
146  filereg(db,bakdb,tmpdb,newpath,logfile)
147  return True
148  else :logfile.write("File Transferred, but not found on tape\n")
149  return False
150 
151 """
152 Path Specifier
153 Reference to ConfirmPath(), ScanDir()
154 """
155 def CheckPath(filename, logfile) :
156  mtime = os.stat(filename).st_mtime
157  year = time.localtime(mtime)[0]
158  month = time.localtime(mtime)[1]
159  if month > 9: yearmonth = str(year) + str(month)
160  else: yearmonth = str(year) + "0" + str(month)
161  path = targetdir + yearmonth
162  logfile.write("Best Guess for the path is " + path + "\n")# guess the path based on mtime
163  newpath = ConfirmPath(filename, path, logfile)# check if the path is correct
164  if cmp(newpath,emptyString) != 0 : return newpath
165  else :# scan all path, if the guess is wrong
166  newpath = ScanDir(filename, logfile)
167  return newpath
168 
169 """
170 Check File Path on Tape
171 Reference to CheckCommand()
172 """
173 def ConfirmPath(file, path, logfile) :
174  logfile.write(" *** Checking File Path ***\n ")
175  time.sleep(10)
176  fullpath = path + "/" + file[len(arcdir):]
177  mycmd = "rfdir "
178  myarg = fullpath
179  cmd = mycmd + myarg
180  result = CheckCommand(cmd, logfile)
181  if result[0] == 0:
182  output = result[1]
183  if cmp(output,"") != 0:
184  for line in output.split("\n"):
185  error_check = "No such file or directory"
186  if line.find(error_check) != -1 :return emptyString
187  logfile.write(" rfdir result is " + line + "\n")
188  if len(line.split()) > 7:
189  string = line.split()[-1]
190  if cmp(string,fullpath) == 0: return fullpath
191  return emptyString
192 
193 """
194 Scan Castor Directories
195 Reference to ConfirmPath(), CheckCommand()
196 """
197 def ScanDir(file, logfile) :
198  mycmd = "rfdir "
199  myarg = targetdir
200  cmd = mycmd + myarg
201  logfile.write("Scanning tape area " + cmd + "\n")
202  result = CheckCommand(cmd, logfile)
203  if result[0] == 0:
204  if cmp(result[1],"") != 0:
205  output = result[1].split('\n')
206  for line in output :
207  if len(line.split()) > 8:
208  newpath = targetdir + line.split()[-1]
209  logfile.write("Looking for File at " + newpath + "\n")
210  confirmpath = ConfirmPath(file, newpath, logfile)
211  logfile.write("Returned Path " + confirmpath + "\n")
212  if cmp(confirmpath, newpath + "/" + file[len(arcdir):] ) == 0: return confirmpath
213  return emptyString
214 
215 # path check over
216 #
217 
218 #
219 # T0 transfer functions
220 """
221 Transfer File with T0 System
222 Reference to CheckCommand()
223 """
224 def TransferWithT0System(filepath, flag, logfile):
225  filename = filepath[len(arcdir):]
226  nrun = filepath[len(arcdir)+len("DQM_Online_R"):-len("_R000064807.zip")]# file name length matters!
227  transfer_string = transferScript + " --runnumber " + nrun + " --path " + arcdir + " --filename " + filename
228  if EnableTransfer is False: transfer_string += " --test "# TEST, no file transfer
229  if flag is True: transfer_string += " --renotify "# transfer failed previously, trying to send it again
230  mycmd = transfer_string
231  myarg = fullTransferArg
232  cmd = mycmd + myarg
233  result = CheckCommand(cmd, logfile)
234  if result[0] == 0:
235  output = result[1].split('\n')
236  for line in output:
237  if line.find("File sucessfully submitted for transfer.") != -1 and flag is False:
238  logfile.write("File is queued " + filepath + "\n")
239  return True
240  if line.find("File sucessfully re-submitted for transfer.") != -1 and flag is True:
241  logfile.write("File is resubmitted " + filepath + "\n")
242  return True
243  if EnableTransfer is False: logfile.write(" *** Transfer Test Mode ***\n No File Transferred.\n") # TEST, no file transfer
244  return False
245 
246 """
247 Check File Status of Transferred File
248 Reference to CheckCommand(), TransferWithT0System()
249 """
250 def CheckFileStatus(filepath, logfile):
251  filename = filepath[len(arcdir):]
252  checkString = statusCheck + filename
253  mycmd = transferScript
254  myarg = checkString
255  cmd = mycmd + myarg
256  result = CheckCommand(cmd, logfile)
257  if result[0] == 0:
258  output = result[1].split('\n')
259  for line in output:
260  if line.find("FILES_TRANS_CHECKED: File found in database and checked by T0 system.") != -1: return True# file transferred successfully!
261  elif line.find("File not found in database.") != -1:# file not transferred at all
262  flag = False
263  TransferWithT0System(filepath,flag, logfile)
264  return False
265  elif line.find("FILES_INJECTED : File found in database and handed over to T0 system. ") != -1:# file must be transferred
266  flag = True
267  TransferWithT0System(filepath,flag, logfile)
268  mtime = os.stat(filepath).st_mtime
269  logfile.write("Old M Time is " + mtime + "\n")
270  os.utime(filepath,None)# change mtime to help path search
271  mtime2 = os.stat(filepath).st_mtime
272  logfile.write("New M Time is " + mtime2 + "\n")
273  logfile.write("File transfer need more time, please wait!\n")
274  return False
275 
276 # T0 transfer over
277 #
278 
279 #
280 # read file list from db
281 """
282 Get List of un-merged files, for Zipping
283 Reference to GetFileFromDB(), GetZippedFile()
284 """
285 def GetListOfFiles(logfile):
286  logfile.write("Retrieving list of files from DB ...\n")
287  totalSize = 0
288  zipFileList = ''
289  if PathReplace is True:# file removal is involved
290  fileList = GetFileFromDB(logfile).split('\n')
291  for line in fileList:
292  if cmp(line,"") != 0 and cmp(line,emptyString) != 0:
293  string = line.rstrip().split('|')
294  name = string[0]
295  logfile.write("String just read is " + string + "\n")
296  number = string[1]
297  logfile.write("Number just read is " + number + "\n")
298  totalSize += int(number)
299  logfile.write("Current File Size Sum is " + str(totalSize) + " out of Limit" + str(fileSizeThreshold) + "\n")
300  zipFileList += " " + name
301  if totalSize > fileSizeThreshold:
302  return zipFileList
303  if PathReplace is False:# file removal is NOT involved
304  activate = False
305  lastfile = ""
306  flag = True
307  mergedfiles = GetZippedFile(logfile,flag).split("\n")
308  if len(mergedfiles) > 0:
309  if cmp(mergedfiles[0],"") != 0 and cmp(mergedfiles[0],emptyString) != 0:
310  lastfile = mergedfiles[0]
311  logfile.write("Last Merged Zip File is " + lastfile + "\n")
312  elif cmp(mergedfiles[0],"") == 0:
313  activate = True
314  logfile.write("No Merged Zip File \n")
315  if len(mergedfiles) == 0:
316  activate = True
317  logfile.write("No Merged Zip File \n")
318  fileList = GetFileFromDB(logfile).split("\n")
319  for line in fileList:
320  if cmp(line,"") != 0 and cmp(line,emptyString) != 0:
321  string = line.split('|')
322  name = string[0]
323  if activate is True:
324  logfile.write("Name just read is " + name + "\n")
325  number = string[1]
326  logfile.write("Number just read is " + number + "\n")
327  totalSize += int(number)
328  logfile.write("Current File Size Sum is " + str(totalSize) + " out of Limit" + str(fileSizeThreshold) + "\n")
329  zipFileList += " " + name
330  if totalSize > fileSizeThreshold:
331  return zipFileList
332  if activate is False and cmp(lastfile,"") !=0:
333  if cmp(lastfile[len(arcdir)+len("DQM_Online_R000064821_"):-len(".zip")],name[len(dir)+len("DQM_V0001_R000064821_"):-len(".root")]) == 0:
334  activate = True
335  return emptyString # it's too small
336 
337 """
338 Read and sort file from db, for Zipping
339 Reference to CheckCommand()
340 """
341 def GetFileFromDB(logfile):
342  logfile.write(" *** Getting Per-Run File List from Master DB ***\n")
343  string = "'%DQM_V%_R%.root'"
344  search1 = "'%RPC%'"
345  search2 = "'%zip%'"
346  sqlite = " %s \"select name, size from t_files where name like %s and not name like %s and not name like %s order by mtime asc\" " %(db, string, search1, search2)
347  mycmd = sqlite3
348  myarg = sqlite
349  cmd = mycmd + myarg
350  result = CheckCommand(cmd, logfile)
351  if result[0] == 0:
352  return result[1]
353  else:
354  logfile.write(result[1])
355  return emptyString
356 
357 """
358 Get the last merged File, for Zipping
359 Reference to CheckCommand()
360 """
361 def GetZippedFile(logfile, flag):
362  logfile.write(" *** Getting Zipped File List from Master DB ***\n")
363  string = "'%DQM%.zip'"
364  if flag is True: sqlite = " %s \"select name from t_files where name like %s order by mtime desc\" " %(db, string)
365  if flag is False: sqlite = " %s \"select name from t_files where name like %s order by mtime asc\" " %(db, string)
366  mycmd = sqlite3
367  myarg = sqlite
368  cmd = mycmd + myarg
369  result = CheckCommand(cmd, logfile)
370  if result[0] == 0:
371  return result[1]
372  else: return emptyString
373 
374 """
375 Getting All Files from DB, for File Removal
376 Reference to CheckCommand()
377 """
378 def GetAllFiles(logfile) :
379  logfile.write(" *** Getting All Files from db ***\n")
380  sqlite = db + " \"select name from t_files where name like '%DQM%.root' or name like '%DQM%.zip'order by mtime asc\""
381  mycmd = sqlite3
382  myarg = sqlite
383  cmd = mycmd + myarg
384  result = CheckCommand(cmd, logfile)
385  if result[0] == 0:
386  output = result[1].split('\n')
387  return output
388  else : return emptyString
389 
390 # file list over
391 #
392 
393 #
394 # remove files and register/unregister if needed
395 """
396 File Cleaner, Remove the oldest file
397 Reference to GetAllFiles(), CheckFileStatus(), SetPath(), Delete(), CheckZippedFiles()
398 """
399 def Cleaner(logfile) :
400  logfile.write(" *** Cleaning File ***\n")
401  files = GetAllFiles(logfile)
402  for file in files:
403  if file.find(".zip") != -1:#zip file
404  status = CheckFileStatus(file, logfile)# check transfer status
405  if status is True and PathReplace is True:# remove file and replace the place
406  pathfind = SetPath(file, logfile)
407  if pathfind is True :# path found on tape
408  Delete(file, logfile)# remove only if transferred
409  return # exits when the files deleted
410  if file.find(".root") != -1 and file.find(dir) != -1:# Select Per-Run files
411  if PathReplace is False: CheckZippedFiles(file, logfile)# need check if zipped or not
412  if PathReplace is True: Delete(file, logfile)# must be zipped by this step
413  return # exits when the file deleted. ie, delete only one file
414  else : logfile.write("No File to be removed!\n")
415 
416 """
417 Remove File if zipped
418 Reference to Delete()
419 """
420 def CheckZippedFiles(file, logfile):
421  logfile.write(" *** Check Zipped File ***\n")
422  flag = False
423  mergedfiles = GetZippedFile(logfile,flag).split("\n")
424  if len(mergedfiles) > 0:
425  for thisfile in mergedfiles:
426  if thisfile.find("zip") != -1 and cmp(thisfile,"") != 0 and cmp(thisfile,emptyString) != 0:
427  zip = zipfile.ZipFile(thisfile, "r")# open file to see it readable
428  for info in zip.infolist():# to see zipfile is uncompressed
429  if cmp(info.filename, file) == 0:
430  Delete(file,logfile)
431  return True
432  logfile.write("This file hasn't been zipped, " + file + " It shouldn't be deleted now!\n")
433 
434 """
435 Remove and Register Files
436 Reference to Delete(), filereg()
437 """
438 def RemoveAndRegister(newFile,oldFiles, logfile):
439  for file in oldFiles.split():
440  newpath = newFile + "#" + file[len(dir):]
441  logfile.write("Registering New File Path " + newpath +"\n")
442  filereg(db,bakdb,tmpdb,newpath,logfile)
443  Delete(file, logfile)
444 
445 """
446 Remove and Unregister A File
447 Reference to fileunreg()
448 """
449 def Delete(file, logfile):
450  fileunreg(db,bakdb,tmpdb,file,logfile)
451  logfile.write(file + "removed from db...\n")
452  os.remove(file)
453  logfile.write(file + "removed from disk...\n")
454 
455 # removal over
456 #
457 
458 #
459 # main program
460 """
461 Main Prog
462 Reference to DiskUsage(), GetListOfFiles(), TransferWithT0System(), RemoveAndRegister()
463 """
464 if __name__ == "__main__":
465  logfile.write("Starting Archival *Test* Script ...\n")
466  if EnableFileRemoval is True: DiskUsage(logfile)# check disk usage
467  zipFileList = GetListOfFiles(logfile) # get list of files for merging
468  if cmp(zipFileList, emptyString) == 0 : logfile.write("Sum of Files is below Threshold = " + str(fileSizeThreshold) + "\n")
469  else :# make zip file only if the output file will be large enough
470  firstFile = "DQM_Online_" + zipFileList.split()[0][len(dir)+len("DQM_V0010_"):-len("R000064807.root")]
471  lastFile = zipFileList.split()[-1][len(dir)+len("DQM_V0010_R000064807_"):-len(".root")]
472  outputFileName = arcdir + firstFile + lastFile + ".zip"
473  logfile.write("1st File = " + firstFile + " Last File = " + lastFile + "\n")
474  if os.path.exists(outputFileName) is True: os.remove(outputFileName)# remove old one if exists
475  if lastFile.find("R") != -1 and firstFile.find("R") != -1:
476  zip = zipfile.ZipFile(outputFileName, "w")# create zip file
477  for name in zipFileList.split():
478  zip.write(name,name, zipfile.ZIP_STORED)# add each file
479  zip.close()# close zip file
480  filepath = outputFileName
481  zipFileSize = os.path.getsize(filepath)
482  logfile.write("Zip File Size = " + str(zipFileSize) + "\n")
483  if zipFileSize > fileSizeThreshold :# check if file is large enough
484  zip = zipfile.ZipFile(outputFileName, "r")# open file to see it readable
485  for info in zip.infolist():# to see zipfile is uncompressed
486  logfile.write("File = " + info.filename + "\n")
487  zip.close()# close zip file
488  if PathReplace is False: filereg(db,bakdb,tmpdb,filepath,logfile)
489  flag = False# brand new transfer
490  transfer = TransferWithT0System(filepath,flag, logfile)# Sending file to Castor
491  if transfer is True and PathReplace is True: RemoveAndRegister(filepath,zipFileList, logfile)# register newpaths and remove files
492  else:
493  logfile.write("Inconsistency! Created Zip File too small!\n")
494  raise RuntimeError
495  else:
496  logfile.write("Wrong File Name Stripping! Check directory path to the file!\n")
497  raise RuntimeError
498  logfile.close()
499 
500 # main program over
501 #
def TransferWithT0System
Definition: archive.py:224
def CheckZippedFiles
Definition: archive.py:420
def RemoveAndRegister
Definition: archive.py:438
def Delete
Definition: archive.py:449
def CheckCommand
Definition: archive.py:90
def ConfirmPath
Definition: archive.py:173
def DiskUsage
Definition: archive.py:116
def CheckPath
Definition: archive.py:155
def SetPath
Definition: archive.py:141
def filereg
Definition: archive.py:49
def GetAllFiles
Definition: archive.py:378
def fileunreg
Definition: archive.py:65
def GetFileFromDB
Definition: archive.py:341
def Cleaner
Definition: archive.py:399
def GetZippedFile
Definition: archive.py:361
def ScanDir
Definition: archive.py:197
def GetListOfFiles
Definition: archive.py:285
double split
Definition: MVATrainer.cc:139
def CheckFileStatus
Definition: archive.py:250