CMS 3D CMS Logo

producerFileCleanner.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import os, time, sys, glob, re, smtplib, socket
3 from email.MIMEText import MIMEText
4 from traceback import print_exc, format_exc
5 from datetime import datetime
6 from subprocess import Popen,PIPE
7 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
8 
9 EMAIL = sys.argv[1]
10 TFILEDONEDIR = sys.argv[2]
11 COLLECTDIR = sys.argv[3]
12 ORIGINALDONEDIR =sys.argv[4]
13 
14 #Constans
15 PRODUCER_DU_TOP= 90.0 #0% a 100%
16 PRODUCER_DU_BOT= 50.0 #0% a 100%
17 WAITTIME = 3600 * 4
18 EMAILINTERVAL = 15 * 60 # Time between sent emails
19 SENDMAIL = "/usr/sbin/sendmail" # sendmail location
20 HOSTNAME = socket.gethostname().lower()
21 EXEDIR = os.path.dirname(__file__)
22 STOP_FILE = "%s/.stop" % EXEDIR
23 
24 # Control variables
25 lastEmailSent = 0
26 
27 # --------------------------------------------------------------------
28 def logme(msg, *args):
29  procid = "[%s/%d]" % (__file__.rsplit("/", 1)[-1], os.getpid())
30  print datetime.now(), procid, msg % args
31 
32 def getDiskUsage(path):
33  fsStats=os.statvfs(path)
34  size=fsStats.f_bsize*fsStats.f_blocks
35  available=fsStats.f_bavail*fsStats.f_bsize
36  used=size-available
37  usedPer=float(used)/size
38  return (size,available,used,usedPer)
39 
40 def getDirSize(path):
41  import stat
42  size=os.stat(path).st_blksize
43  for directory,subdirs,files in os.walk(path):
44  dStats=os.lstat(directory)
45  size+=(dStats[stat.ST_NLINK]-1)*dStats[stat.ST_SIZE]
46  for f in files:
47  fStats=os.lstat("%s/%s" % (directory,f))
48  fSize=fStats[stat.ST_SIZE]
49  size+=fSize
50 
51  return size
52 
53 def sendmail(body="Hello from producerFileCleanner",subject= "Hello!"):
54  scall = Popen("%s -t" % SENDMAIL, shell=True, stdin=PIPE)
55  scall.stdin.write("To: %s\n" % EMAIL)
56  scall.stdin.write("Subject: producerFileCleaner problem on server %s\n" %
57  HOSTNAME)
58  scall.stdin.write("\n") # blank line separating headers from body
59  scall.stdin.write("%s\n" % body)
60  scall.stdin.close()
61  rc = scall.wait()
62  if rc != 0:
63  logme("ERROR: Sendmail exit with status %s", rc)
64 
65 # --------------------------------------------------------------------
66 while True:
67  #Check if you need to stop.
68  if os.path.exists(STOP_FILE):
69  logme("INFO: Stop file found, quitting")
70  sys.exit(0)
71 
72  try:
73  try:
74  doneSize=getDirSize(TFILEDONEDIR)
75  diskSize,userAvailable,diskUsed,diskPUsage=getDiskUsage(TFILEDONEDIR)
76 
77  except:
78  doneSize=0
79  diskSize,userAvailable,diskUsed,diskPUsage=getDiskUsage("/home")
80 
81  diskPUsage*=100
82  if diskPUsage < PRODUCER_DU_TOP:
83  time.sleep(WAITTIME)
84  continue
85 
86  quota=long(diskSize*PRODUCER_DU_BOT/100)
87  delQuota=diskUsed-quota
88  if delQuota > doneSize:
89  now = time.time()
90  if now - EMAILINTERVAL > lastEmailSent:
91  msg="ERROR: Something is filling up the disks, %s does not" \
92  " have enough files to get to the Bottom Boundary of" \
93  " %.2f%%" % (TFILEDONEDIR,PRODUCER_DU_BOT)
94  sendmail(msg)
95  lastEmailSent = now
96 
97  logme("ERROR: Something is filling up the disks, %s does not" \
98  " have enough files to get to the Bottom Boundary of" \
99  " %.2f%%", TFILEDONEDIR, PRODUCER_DU_BOT)
100 
101  aDelQuota=0
102  FILE_LIST=[]
103  for directory,subdirs,files in os.walk(TFILEDONEDIR):
104  subdirs.sort()
105  for f in sorted(files,key=lambda a: a[a.rfind("_R",1)+2:a.rfind("_R",1)+11]):
106  fMatch=re.match(r"(DQM|Playback|Playback_full)_V[0-9]{4}_([0-9a-zA-Z]+)_R([0-9]{9})(_T[0-9]{8}|)\.root",f)
107  if fMatch:
108  subSystem=fMatch.group(2)
109  run=fMatch.group(3)
110  destDir="%s/%sxxxx/%sxx/DQM_V0001_%s_R%s.root" % (ORIGINALDONEDIR,run[0:5],run[0:7],subSystem,run)
111  fullFName="%s/%s" % (directory,f)
112  if os.stat(fullFName).st_size+aDelQuota > delQuota:
113  break
114 
115  FILE_LIST.append(fullFName)
116  aDelQuota+=os.stat(fullFName).st_size
117  if not os.path.exists(destDir):
118  logme("WARNING: No subsystem file in repository %s for"
119  " file %s, deleting any way" %
120  (ORIGINALDONEDIR, fullFName))
121 
122  if len(FILE_LIST):
123  logme("INFO: Found %d files to be deleted", len(FILE_LIST))
124 
125  #Cleanning ouput directory
126  for directory,subdirs,files in os.walk(COLLECTDIR):
127  #no subdiretories allowed in COLLECTDIR the directory
128  if subdirs:
129  logme("ERROR: Output directory %s, must not contain"
130  " subdirectories, cleanning", COLLECTDIR)
131 
132  for sd in subdirs:
133  fullSdName="%s/%s" % (directory,sd)
134  for sdRoot,sdDirs,sdFiles in os.walk(fullSdName,topdown=False):
135  for f in sdFiles:
136  try:
137  os.remove(f)
138  logme("INFO: File %s has been removed", f)
139  except Exception as e:
140  logme("ERROR: Problem deleting file: [Errno %d] %s, '%s'",
141  e.errno, e.strerror, e.filename)
142 
143  try:
144  os.removedir(sdRoot)
145  logme("INFO: File %s has been removed" , sdRoot)
146  except Exception as e:
147  logme("ERROR: Problem deleting directory: [Errno %d] %s, '%s'",
148  e.errno, e.strerror, e.filename)
149 
150  for f in files:
151  if re.match(r"(DQM|Playback|Playback_full)_V[0-9]{4}_([a-zA-Z]+)_R([0-9]{9})_T[0-9]{8}\.root", f):
152  continue
153 
154  if re.match(r".*\.tmp",f):
155  continue
156 
157  fullFName="%s/%s" % (directory, f)
158  FILE_LIST.append(fullFName)
159 
160  #cleaning tmp files:
161  TMP_LIST=glob.glob("%s/*.tmp" % COLLECTDIR)
162  TMP_LIST.sort(reverse=True,key=lambda x: os.stat(x).st_mtime)
163  len(TMP_LIST) > 0 and TMP_LIST.pop(0)
164  FILE_LIST.extend(TMP_LIST)
165 
166  #remove files
167  DIR_LIST=[]
168  for f in FILE_LIST:
169  try:
170  os.remove(f)
171  logme("INFO: File %s has been removed", f)
172  except Exception as e:
173  logme("ERROR: Problem deleting file: [Errno %d] %s, '%s'",
174  e.errno, e.strerror, e.filename)
175  if os.path.dirname(f) not in DIR_LIST and COLLECTDIR not in os.path.dirname(f):
176  DIR_LIST.append(os.path.dirname(f))
177 
178  #remove emprty directories
179  for d in DIR_LIST:
180  try:
181  os.removedirs(d)
182  logme("INFO: Directory %s has been removed", d)
183  except Exception as e:
184  logme("ERROR: Directory delition failed: [Errno %d] %s, '%s'",
185  e.errno, e.strerror, e.filename)
186 
187  except KeyboardInterrupt as e:
188  sys.exit(0)
189 
190  except Exception as e:
191  logme('ERROR: %s', e)
192  sendmail ('ERROR: %s\n%s' % (e, format_exc()))
193  now = time.time()
194  if now - EMAILINTERVAL > lastEmailSent:
195  sendmail ('ERROR: %s\n%s' % (e, format_exc()))
196  lastEmailSent = now
197 
198  print_exc()
199 
200  time.sleep(WAITTIME)
201 
202 
203 
204 
def sendmail(body="Hello from producerFileCleanner", subject="Hello!")