00001
00002
00003 import os, sys, stat
00004 from operator import itemgetter
00005
00006 class TreeAnalyzer(object):
00007
00008 def __init__(self, outFileName):
00009 self.dirSizes = {}
00010 self.fileSizes = {}
00011 self.outFileName = outFileName
00012 print "going to write to:",self.outFileName
00013
00014 def analyzePath(self, dirIn) :
00015
00016 for (path, dirs, files) in os.walk(dirIn):
00017
00018 if 'CVS' in path: continue
00019 if '.glimpse_' in path: continue
00020 if 'Configuration/PyReleaseValidation/data/run/' in path: continue
00021
00022 for file in files:
00023 if '.glimpse_index' in file: continue
00024 fileName = os.path.join(path, file)
00025 fileSize = os.path.getsize(fileName)
00026 if path in self.dirSizes.keys() :
00027 self.dirSizes[path] += fileSize
00028 else:
00029 self.dirSizes[path] = fileSize
00030 if os.path.isfile(fileName):
00031 self.fileSizes[fileName] = fileSize
00032
00033 try:
00034 import json
00035 jsonFileName = self.outFileName
00036 jsonFile = open(jsonFileName, 'w')
00037 json.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], jsonFile)
00038 jsonFile.close()
00039 print 'treeInfo info written to ', jsonFileName
00040 except Exception, e:
00041 print "error writing json file:", str(e)
00042
00043 try:
00044 import pickle
00045 pklFileName = self.outFileName.replace('.json','.pkl')
00046 pickle.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], open(pklFileName, 'w') )
00047 print 'treeInfo info written to ', pklFileName
00048 except Exception, e:
00049 print "error writing pkl file:", str(e)
00050
00051 def show(self):
00052
00053
00054
00055
00056 topDirs = sorted(self.dirSizes.items() , key=itemgetter(1), reverse=True)
00057 topFiles = sorted(self.fileSizes.items(), key=itemgetter(1), reverse=True)
00058
00059 emptyFiles = []
00060 for pair in topFiles:
00061 p, s = pair
00062 if s == 0:
00063 emptyFiles.append(p)
00064 print "found ",len(emptyFiles),"empty files. "
00065
00066 print "found ", len(self.dirSizes.keys()), 'directories, top 10 are:'
00067 for i in range(10):
00068 print topDirs[i]
00069
00070 print "found ", len(self.fileSizes.keys()), 'files, top 10 are:'
00071 for i in range(10):
00072 print topFiles[i]
00073
00074
00075 def main():
00076
00077 import getopt
00078
00079 try:
00080 opts, args = getopt.getopt(sys.argv[1:], "c:o:", ['checkDir=', 'outFile='])
00081
00082 checkDir = '.'
00083 outFile = None
00084 for opt, arg in opts :
00085
00086 if opt in ('-c', "--checkDir", ):
00087 checkDir = arg
00088
00089 if opt in ('-o', "--outFile", ):
00090 outFile = arg
00091
00092 ta = TreeAnalyzer(outFile)
00093 ta.analyzePath(checkDir)
00094 ta.show()
00095
00096 except getopt.GetoptError, e:
00097 print "unknown option", str(e)
00098 sys.exit(2)
00099
00100 if __name__ == '__main__':
00101 main()
00102