CMS 3D CMS Logo

splitter.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import sys,os,commands
3 from CommonMethods import *
4 
5 class FileObj:
6  def __init__(self):
7  self.run = 0
8  self.size = 0
9  self.fileNames = []
10 
12  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
13  if not regExp:
14  return -1
15  return long(regExp.group(3))
16 
17 
18 
19 def main():
20  if len(sys.argv) < 2:
21  error = "Usage: splitter fromDir"
22  exit(error)
23  sourceDir = sys.argv[1] + '/'
24 
25  fileList = ls(sourceDir,".txt")
26 
27  fileObjList = {}
28 
29  totalSize = 0
30  for fileName in fileList:
31  runNumber = getRunNumberFromFileName(fileName)
32  if runNumber not in fileObjList:
33  fileObjList[runNumber] = FileObj()
34  fileObjList[runNumber].run = runNumber
35  fileObjList[runNumber].fileNames.append(fileName)
36  aCommand = 'ls -l '+ sourceDir + fileName
37  output = commands.getstatusoutput( aCommand )
38  fileObjList[runNumber].size += int(output[1].split(' ')[4])
39  totalSize += int(output[1].split(' ')[4])
40 
41  sortedKeys = sorted(fileObjList.keys())
42 
43  split=13
44 
45  dirSize = 0
46  tmpList = []
47  for run in sortedKeys:
48  dirSize += fileObjList[run].size
49  tmpList.append(fileObjList[run])
50  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
51  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
52  aCommand = 'mkdir '+ newDir
53  output = commands.getstatusoutput( aCommand )
54  print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run)
55  for runs in tmpList:
56  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
57  cp(sourceDir,newDir,runs.fileNames)
58  tmpList = []
59  dirSize = 0
60 
61 
62 
63 
64  print totalSize
65  print sortedKeys
66  exit("ok")
67 
68 
69 
70 
71 
72 
73  if not os.path.isdir(destDir):
74  error = "WARNING: destination directory doesn't exist! Creating it..."
75  print error
76  os.mkdir(destDir)
77  copiedFiles = cp(sourceDir,destDir,fileList)
78 
79  if len(copiedFiles) != len(fileList):
80  error = "ERROR: I couldn't copy all files from castor"
81  exit(error)
82 
83  for fileName in fileList:
84  fullFileName = destDir + fileName
85  runNumber = -1;
86  with open(fullFileName,'r') as file:
87  for line in file:
88  if line.find("Runnumber") != -1:
89  tmpRun = int(line.split(' ')[1])
90  if runNumber != -1 and tmpRun != runNumber:
91  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
92  exit(error)
93  runNumber = int(line.split(' ')[1])
94  file.close()
95  newFileName = fileName.replace("None",str(runNumber))
96  if fileName != newFileName:
97  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
98  print aCmd
99  output = commands.getstatusoutput(aCmd)
100  if output[0] != 0:
101  print output[1]
102  else:
103  print "WARNING couldn't find keyword None in file " + fileName
104 
105 
106 
107 
108 
109 if __name__ == "__main__":
110  main()
def main()
Definition: splitter.py:19
def __init__(self)
Definition: splitter.py:6
def ls(path, rec=False)
Definition: eostools.py:348
def getRunNumberFromFileName(fileName)
Definition: splitter.py:11
Definition: main.py:1
#define str(s)
double split
Definition: MVATrainer.cc:139