CMS 3D CMS Logo

splitter.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import sys,os,commands
4 from CommonMethods import *
5 
6 class FileObj:
7  def __init__(self):
8  self.run = 0
9  self.size = 0
10  self.fileNames = []
11 
13  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
14  if not regExp:
15  return -1
16  return long(regExp.group(3))
17 
18 
19 
20 def main():
21  if len(sys.argv) < 2:
22  error = "Usage: splitter fromDir"
23  exit(error)
24  sourceDir = sys.argv[1] + '/'
25 
26  fileList = ls(sourceDir,".txt")
27 
28  fileObjList = {}
29 
30  totalSize = 0
31  for fileName in fileList:
32  runNumber = getRunNumberFromFileName(fileName)
33  if runNumber not in fileObjList:
34  fileObjList[runNumber] = FileObj()
35  fileObjList[runNumber].run = runNumber
36  fileObjList[runNumber].fileNames.append(fileName)
37  aCommand = 'ls -l '+ sourceDir + fileName
38  output = commands.getstatusoutput( aCommand )
39  fileObjList[runNumber].size += int(output[1].split(' ')[4])
40  totalSize += int(output[1].split(' ')[4])
41 
42  sortedKeys = sorted(fileObjList.keys())
43 
44  split=13
45 
46  dirSize = 0
47  tmpList = []
48  for run in sortedKeys:
49  dirSize += fileObjList[run].size
50  tmpList.append(fileObjList[run])
51  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
52  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
53  aCommand = 'mkdir '+ newDir
54  output = commands.getstatusoutput( aCommand )
55  print(str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run))
56  for runs in tmpList:
57  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
58  cp(sourceDir,newDir,runs.fileNames)
59  tmpList = []
60  dirSize = 0
61 
62 
63 
64 
65  print(totalSize)
66  print(sortedKeys)
67  exit("ok")
68 
69 
70 
71 
72 
73 
74  if not os.path.isdir(destDir):
75  error = "WARNING: destination directory doesn't exist! Creating it..."
76  print(error)
77  os.mkdir(destDir)
78  copiedFiles = cp(sourceDir,destDir,fileList)
79 
80  if len(copiedFiles) != len(fileList):
81  error = "ERROR: I couldn't copy all files from castor"
82  exit(error)
83 
84  for fileName in fileList:
85  fullFileName = destDir + fileName
86  runNumber = -1;
87  with open(fullFileName,'r') as file:
88  for line in file:
89  if line.find("Runnumber") != -1:
90  tmpRun = int(line.split(' ')[1])
91  if runNumber != -1 and tmpRun != runNumber:
92  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
93  exit(error)
94  runNumber = int(line.split(' ')[1])
95  file.close()
96  newFileName = fileName.replace("None",str(runNumber))
97  if fileName != newFileName:
98  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
99  print(aCmd)
100  output = commands.getstatusoutput(aCmd)
101  if output[0] != 0:
102  print(output[1])
103  else:
104  print("WARNING couldn't find keyword None in file " + fileName)
105 
106 
107 
108 
109 
110 if __name__ == "__main__":
111  main()
def main()
Definition: splitter.py:20
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __init__(self)
Definition: splitter.py:7
def ls(path, rec=False)
Definition: eostools.py:349
def getRunNumberFromFileName(fileName)
Definition: splitter.py:12
Definition: main.py:1
#define str(s)
double split
Definition: MVATrainer.cc:139