CMS 3D CMS Logo

splitter.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import sys,os,commands
3 from CommonMethods import *
4 
5 class FileObj:
6  def __init__(self):
7  self.run = 0
8  self.size = 0
9  self.fileNames = []
10 
12  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
13  if not regExp:
14  return -1
15  return long(regExp.group(3))
16 
17 
18 
19 def main():
20  if len(sys.argv) < 2:
21  error = "Usage: splitter fromDir"
22  exit(error)
23  sourceDir = sys.argv[1] + '/'
24 
25  fileList = ls(sourceDir,".txt")
26 
27  fileObjList = {}
28 
29  totalSize = 0
30  for fileName in fileList:
31  runNumber = getRunNumberFromFileName(fileName)
32  if runNumber not in fileObjList:
33  fileObjList[runNumber] = FileObj()
34  fileObjList[runNumber].run = runNumber
35  fileObjList[runNumber].fileNames.append(fileName)
36  aCommand = 'ls -l '+ sourceDir + fileName
37  output = commands.getstatusoutput( aCommand )
38  fileObjList[runNumber].size += int(output[1].split(' ')[4])
39  totalSize += int(output[1].split(' ')[4])
40 
41  sortedKeys = fileObjList.keys()
42  sortedKeys.sort()
43 
44  split=13
45 
46  dirSize = 0
47  tmpList = []
48  for run in sortedKeys:
49  dirSize += fileObjList[run].size
50  tmpList.append(fileObjList[run])
51  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
52  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
53  aCommand = 'mkdir '+ newDir
54  output = commands.getstatusoutput( aCommand )
55  print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run)
56  for runs in tmpList:
57  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
58  cp(sourceDir,newDir,runs.fileNames)
59  tmpList = []
60  dirSize = 0
61 
62 
63 
64 
65  print totalSize
66  print sortedKeys
67  exit("ok")
68 
69 
70 
71 
72 
73 
74  if not os.path.isdir(destDir):
75  error = "WARNING: destination directory doesn't exist! Creating it..."
76  print error
77  os.mkdir(destDir)
78  copiedFiles = cp(sourceDir,destDir,fileList)
79 
80  if len(copiedFiles) != len(fileList):
81  error = "ERROR: I couldn't copy all files from castor"
82  exit(error)
83 
84  for fileName in fileList:
85  fullFileName = destDir + fileName
86  runNumber = -1;
87  with open(fullFileName,'r') as file:
88  for line in file:
89  if line.find("Runnumber") != -1:
90  tmpRun = int(line.split(' ')[1])
91  if runNumber != -1 and tmpRun != runNumber:
92  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
93  exit(error)
94  runNumber = int(line.split(' ')[1])
95  file.close()
96  newFileName = fileName.replace("None",str(runNumber))
97  if fileName != newFileName:
98  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
99  print aCmd
100  output = commands.getstatusoutput(aCmd)
101  if output[0] != 0:
102  print output[1]
103  else:
104  print "WARNING couldn't find keyword None in file " + fileName
105 
106 
107 
108 
109 
110 if __name__ == "__main__":
111  main()
def main()
Definition: splitter.py:19
def __init__(self)
Definition: splitter.py:6
def ls(path, rec=False)
Definition: eostools.py:348
def getRunNumberFromFileName(fileName)
Definition: splitter.py:11
Definition: main.py:1
double split
Definition: MVATrainer.cc:139