CMS 3D CMS Logo

storeTreeInfo.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 import os, sys, stat
5 from operator import itemgetter
6 
8 
9  def __init__(self, outFileName):
10  self.dirSizes = {}
11  self.fileSizes = {}
12  self.outFileName = outFileName
13  print("going to write to:",self.outFileName)
14 
15  def analyzePath(self, dirIn) :
16 
17  for (path, dirs, files) in os.walk(dirIn):
18 
19  if 'CVS' in path: continue
20  if '.glimpse_' in path: continue
21  if 'Configuration/PyReleaseValidation/data/run/' in path: continue
22 
23  for file in files:
24  if '.glimpse_index' in file: continue
25  fileName = os.path.join(path, file)
26  fileSize = os.path.getsize(fileName)
27  if path in self.dirSizes.keys() :
28  self.dirSizes[path] += fileSize
29  else:
30  self.dirSizes[path] = fileSize
31  if os.path.isfile(fileName):
32  self.fileSizes[fileName] = fileSize
33 
34  try:
35  import json
36  jsonFileName = self.outFileName
37  jsonFile = open(jsonFileName, 'w')
38  json.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], jsonFile)
39  jsonFile.close()
40  print('treeInfo info written to ', jsonFileName)
41  except Exception as e:
42  print("error writing json file:", str(e))
43 
44  try:
45  import pickle
46  pklFileName = self.outFileName.replace('.json','.pkl')
47  pickle.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], open(pklFileName, 'w') )
48  print('treeInfo info written to ', pklFileName)
49  except Exception as e:
50  print("error writing pkl file:", str(e))
51 
52  def show(self):
53 
54  # for p,s in self.dirSizes.items():
55  # print p, s
56 
57  topDirs = sorted(self.dirSizes.items() , key=itemgetter(1), reverse=True)
58  topFiles = sorted(self.fileSizes.items(), key=itemgetter(1), reverse=True)
59 
60  emptyFiles = []
61  for pair in topFiles:
62  p, s = pair
63  if s == 0:
64  emptyFiles.append(p)
65  print("found ",len(emptyFiles),"empty files. ")
66 
67  print("found ", len(self.dirSizes), 'directories, top 10 are:')
68  for i in range(10):
69  print(topDirs[i])
70 
71  print("found ", len(self.fileSizes), 'files, top 10 are:')
72  for i in range(10):
73  print(topFiles[i])
74 
75 
76 def main():
77 
78  import getopt
79 
80  try:
81  opts, args = getopt.getopt(sys.argv[1:], "c:o:", ['checkDir=', 'outFile='])
82 
83  checkDir = '.'
84  outFile = None
85  for opt, arg in opts :
86 
87  if opt in ('-c', "--checkDir", ):
88  checkDir = arg
89 
90  if opt in ('-o', "--outFile", ):
91  outFile = arg
92 
93  ta = TreeAnalyzer(outFile)
94  ta.analyzePath(checkDir)
95  ta.show()
96 
97  except getopt.GetoptError as e:
98  print("unknown option", str(e))
99  sys.exit(2)
100 
101 if __name__ == '__main__':
102  main()
103 
def __init__(self, outFileName)
Definition: storeTreeInfo.py:9
def analyzePath(self, dirIn)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
Definition: main.py:1
#define str(s)