CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
storeTreeInfo.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import os, sys, stat
4 from operator import itemgetter
5 
6 class TreeAnalyzer(object):
7 
8  def __init__(self, outFileName):
9  self.dirSizes = {}
10  self.fileSizes = {}
11  self.outFileName = outFileName
12  print "going to write to:",self.outFileName
13 
14  def analyzePath(self, dirIn) :
15 
16  for (path, dirs, files) in os.walk(dirIn):
17 
18  if 'CVS' in path: continue
19  if '.glimpse_' in path: continue
20  if 'Configuration/PyReleaseValidation/data/run/' in path: continue
21 
22  for file in files:
23  if '.glimpse_index' in file: continue
24  fileName = os.path.join(path, file)
25  fileSize = os.path.getsize(fileName)
26  if path in self.dirSizes.keys() :
27  self.dirSizes[path] += fileSize
28  else:
29  self.dirSizes[path] = fileSize
30  if os.path.isfile(fileName):
31  self.fileSizes[fileName] = fileSize
32 
33  try:
34  import json
35  jsonFileName = self.outFileName
36  jsonFile = open(jsonFileName, 'w')
37  json.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], jsonFile)
38  jsonFile.close()
39  print 'treeInfo info written to ', jsonFileName
40  except Exception as e:
41  print "error writing json file:", str(e)
42 
43  try:
44  import pickle
45  pklFileName = self.outFileName.replace('.json','.pkl')
46  pickle.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], open(pklFileName, 'w') )
47  print 'treeInfo info written to ', pklFileName
48  except Exception as e:
49  print "error writing pkl file:", str(e)
50 
51  def show(self):
52 
53  # for p,s in self.dirSizes.items():
54  # print p, s
55 
56  topDirs = sorted(self.dirSizes.items() , key=itemgetter(1), reverse=True)
57  topFiles = sorted(self.fileSizes.items(), key=itemgetter(1), reverse=True)
58 
59  emptyFiles = []
60  for pair in topFiles:
61  p, s = pair
62  if s == 0:
63  emptyFiles.append(p)
64  print "found ",len(emptyFiles),"empty files. "
65 
66  print "found ", len(self.dirSizes.keys()), 'directories, top 10 are:'
67  for i in range(10):
68  print topDirs[i]
69 
70  print "found ", len(self.fileSizes.keys()), 'files, top 10 are:'
71  for i in range(10):
72  print topFiles[i]
73 
74 
75 def main():
76 
77  import getopt
78 
79  try:
80  opts, args = getopt.getopt(sys.argv[1:], "c:o:", ['checkDir=', 'outFile='])
81 
82  checkDir = '.'
83  outFile = None
84  for opt, arg in opts :
85 
86  if opt in ('-c', "--checkDir", ):
87  checkDir = arg
88 
89  if opt in ('-o', "--outFile", ):
90  outFile = arg
91 
92  ta = TreeAnalyzer(outFile)
93  ta.analyzePath(checkDir)
94  ta.show()
95 
96  except getopt.GetoptError as e:
97  print "unknown option", str(e)
98  sys.exit(2)
99 
100 if __name__ == '__main__':
101  main()
102 
Definition: main.py:1