CMS 3D CMS Logo

producerFileCleanner.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import os, time, sys, glob, re, smtplib, socket
4 from email.MIMEText import MIMEText
5 from traceback import print_exc, format_exc
6 from datetime import datetime
7 from subprocess import Popen,PIPE
8 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
9 
10 EMAIL = sys.argv[1]
11 TFILEDONEDIR = sys.argv[2]
12 COLLECTDIR = sys.argv[3]
13 ORIGINALDONEDIR =sys.argv[4]
14 
15 #Constans
16 PRODUCER_DU_TOP= 90.0 #0% a 100%
17 PRODUCER_DU_BOT= 50.0 #0% a 100%
18 WAITTIME = 3600 * 4
19 EMAILINTERVAL = 15 * 60 # Time between sent emails
20 SENDMAIL = "/usr/sbin/sendmail" # sendmail location
21 HOSTNAME = socket.gethostname().lower()
22 EXEDIR = os.path.dirname(__file__)
23 STOP_FILE = "%s/.stop" % EXEDIR
24 
25 # Control variables
26 lastEmailSent = 0
27 
28 # --------------------------------------------------------------------
29 def logme(msg, *args):
30  procid = "[%s/%d]" % (__file__.rsplit("/", 1)[-1], os.getpid())
31  print(datetime.now(), procid, msg % args)
32 
33 def getDiskUsage(path):
34  fsStats=os.statvfs(path)
35  size=fsStats.f_bsize*fsStats.f_blocks
36  available=fsStats.f_bavail*fsStats.f_bsize
37  used=size-available
38  usedPer=float(used)/size
39  return (size,available,used,usedPer)
40 
41 def getDirSize(path):
42  import stat
43  size=os.stat(path).st_blksize
44  for directory,subdirs,files in os.walk(path):
45  dStats=os.lstat(directory)
46  size+=(dStats[stat.ST_NLINK]-1)*dStats[stat.ST_SIZE]
47  for f in files:
48  fStats=os.lstat("%s/%s" % (directory,f))
49  fSize=fStats[stat.ST_SIZE]
50  size+=fSize
51 
52  return size
53 
54 def sendmail(body="Hello from producerFileCleanner",subject= "Hello!"):
55  scall = Popen("%s -t" % SENDMAIL, shell=True, stdin=PIPE)
56  scall.stdin.write("To: %s\n" % EMAIL)
57  scall.stdin.write("Subject: producerFileCleaner problem on server %s\n" %
58  HOSTNAME)
59  scall.stdin.write("\n") # blank line separating headers from body
60  scall.stdin.write("%s\n" % body)
61  scall.stdin.close()
62  rc = scall.wait()
63  if rc != 0:
64  logme("ERROR: Sendmail exit with status %s", rc)
65 
66 # --------------------------------------------------------------------
67 while True:
68  #Check if you need to stop.
69  if os.path.exists(STOP_FILE):
70  logme("INFO: Stop file found, quitting")
71  sys.exit(0)
72 
73  try:
74  try:
75  doneSize=getDirSize(TFILEDONEDIR)
76  diskSize,userAvailable,diskUsed,diskPUsage=getDiskUsage(TFILEDONEDIR)
77 
78  except:
79  doneSize=0
80  diskSize,userAvailable,diskUsed,diskPUsage=getDiskUsage("/home")
81 
82  diskPUsage*=100
83  if diskPUsage < PRODUCER_DU_TOP:
84  time.sleep(WAITTIME)
85  continue
86 
87  quota=long(diskSize*PRODUCER_DU_BOT/100)
88  delQuota=diskUsed-quota
89  if delQuota > doneSize:
90  now = time.time()
91  if now - EMAILINTERVAL > lastEmailSent:
92  msg="ERROR: Something is filling up the disks, %s does not" \
93  " have enough files to get to the Bottom Boundary of" \
94  " %.2f%%" % (TFILEDONEDIR,PRODUCER_DU_BOT)
95  sendmail(msg)
96  lastEmailSent = now
97 
98  logme("ERROR: Something is filling up the disks, %s does not" \
99  " have enough files to get to the Bottom Boundary of" \
100  " %.2f%%", TFILEDONEDIR, PRODUCER_DU_BOT)
101 
102  aDelQuota=0
103  FILE_LIST=[]
104  for directory,subdirs,files in os.walk(TFILEDONEDIR):
105  subdirs.sort()
106  for f in sorted(files,key=lambda a: a[a.rfind("_R",1)+2:a.rfind("_R",1)+11]):
107  fMatch=re.match(r"(DQM|Playback|Playback_full)_V[0-9]{4}_([0-9a-zA-Z]+)_R([0-9]{9})(_T[0-9]{8}|)\.root",f)
108  if fMatch:
109  subSystem=fMatch.group(2)
110  run=fMatch.group(3)
111  destDir="%s/%sxxxx/%sxx/DQM_V0001_%s_R%s.root" % (ORIGINALDONEDIR,run[0:5],run[0:7],subSystem,run)
112  fullFName="%s/%s" % (directory,f)
113  if os.stat(fullFName).st_size+aDelQuota > delQuota:
114  break
115 
116  FILE_LIST.append(fullFName)
117  aDelQuota+=os.stat(fullFName).st_size
118  if not os.path.exists(destDir):
119  logme("WARNING: No subsystem file in repository %s for"
120  " file %s, deleting any way" %
121  (ORIGINALDONEDIR, fullFName))
122 
123  if len(FILE_LIST):
124  logme("INFO: Found %d files to be deleted", len(FILE_LIST))
125 
126  #Cleanning ouput directory
127  for directory,subdirs,files in os.walk(COLLECTDIR):
128  #no subdiretories allowed in COLLECTDIR the directory
129  if subdirs:
130  logme("ERROR: Output directory %s, must not contain"
131  " subdirectories, cleanning", COLLECTDIR)
132 
133  for sd in subdirs:
134  fullSdName="%s/%s" % (directory,sd)
135  for sdRoot,sdDirs,sdFiles in os.walk(fullSdName,topdown=False):
136  for f in sdFiles:
137  try:
138  os.remove(f)
139  logme("INFO: File %s has been removed", f)
140  except Exception as e:
141  logme("ERROR: Problem deleting file: [Errno %d] %s, '%s'",
142  e.errno, e.strerror, e.filename)
143 
144  try:
145  os.removedir(sdRoot)
146  logme("INFO: File %s has been removed" , sdRoot)
147  except Exception as e:
148  logme("ERROR: Problem deleting directory: [Errno %d] %s, '%s'",
149  e.errno, e.strerror, e.filename)
150 
151  for f in files:
152  if re.match(r"(DQM|Playback|Playback_full)_V[0-9]{4}_([a-zA-Z]+)_R([0-9]{9})_T[0-9]{8}\.root", f):
153  continue
154 
155  if re.match(r".*\.tmp",f):
156  continue
157 
158  fullFName="%s/%s" % (directory, f)
159  FILE_LIST.append(fullFName)
160 
161  #cleaning tmp files:
162  TMP_LIST=glob.glob("%s/*.tmp" % COLLECTDIR)
163  TMP_LIST.sort(reverse=True,key=lambda x: os.stat(x).st_mtime)
164  len(TMP_LIST) > 0 and TMP_LIST.pop(0)
165  FILE_LIST.extend(TMP_LIST)
166 
167  #remove files
168  DIR_LIST=[]
169  for f in FILE_LIST:
170  try:
171  os.remove(f)
172  logme("INFO: File %s has been removed", f)
173  except Exception as e:
174  logme("ERROR: Problem deleting file: [Errno %d] %s, '%s'",
175  e.errno, e.strerror, e.filename)
176  if os.path.dirname(f) not in DIR_LIST and COLLECTDIR not in os.path.dirname(f):
177  DIR_LIST.append(os.path.dirname(f))
178 
179  #remove emprty directories
180  for d in DIR_LIST:
181  try:
182  os.removedirs(d)
183  logme("INFO: Directory %s has been removed", d)
184  except Exception as e:
185  logme("ERROR: Directory delition failed: [Errno %d] %s, '%s'",
186  e.errno, e.strerror, e.filename)
187 
188  except KeyboardInterrupt as e:
189  sys.exit(0)
190 
191  except Exception as e:
192  logme('ERROR: %s', e)
193  sendmail ('ERROR: %s\n%s' % (e, format_exc()))
194  now = time.time()
195  if now - EMAILINTERVAL > lastEmailSent:
196  sendmail ('ERROR: %s\n%s' % (e, format_exc()))
197  lastEmailSent = now
198 
199  print_exc()
200 
201  time.sleep(WAITTIME)
202 
203 
204 
205 
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
producerFileCleanner.getDirSize
def getDirSize(path)
Definition: producerFileCleanner.py:41
producerFileCleanner.getDiskUsage
def getDiskUsage(path)
Definition: producerFileCleanner.py:33
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
producerFileCleanner.logme
def logme(msg, *args)
Definition: producerFileCleanner.py:29
producerFileCleanner.sendmail
def sendmail(body="Hello from producerFileCleanner", subject="Hello!")
Definition: producerFileCleanner.py:54