CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Classes | Functions
splitter Namespace Reference

Classes

class  FileObj
 

Functions

def getRunNumberFromFileName
 
def main
 

Function Documentation

def splitter.getRunNumberFromFileName (   fileName)

Definition at line 11 of file splitter.py.

Referenced by main().

11 
12 def getRunNumberFromFileName(fileName):
13  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
14  if not regExp:
15  return -1
16  return long(regExp.group(3))
17 
18 
def getRunNumberFromFileName
Definition: splitter.py:11
def splitter.main ( )

Definition at line 19 of file splitter.py.

References CommonMethods.cp(), cmsRelvalreport.exit, getRunNumberFromFileName(), eostools.ls(), and split.

19 
20 def main():
21  if len(sys.argv) < 2:
22  error = "Usage: splitter fromDir"
23  exit(error)
24  sourceDir = sys.argv[1] + '/'
25 
26  fileList = ls(sourceDir,".txt")
27 
28  fileObjList = {}
29 
30  totalSize = 0
31  for fileName in fileList:
32  runNumber = getRunNumberFromFileName(fileName)
33  if runNumber not in fileObjList:
34  fileObjList[runNumber] = FileObj()
35  fileObjList[runNumber].run = runNumber
36  fileObjList[runNumber].fileNames.append(fileName)
37  aCommand = 'ls -l '+ sourceDir + fileName
38  output = commands.getstatusoutput( aCommand )
39  fileObjList[runNumber].size += int(output[1].split(' ')[4])
40  totalSize += int(output[1].split(' ')[4])
41 
42  sortedKeys = fileObjList.keys()
43  sortedKeys.sort()
44 
45  split=13
46 
47  dirSize = 0
48  tmpList = []
49  for run in sortedKeys:
50  dirSize += fileObjList[run].size
51  tmpList.append(fileObjList[run])
52  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
53  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
54  aCommand = 'mkdir '+ newDir
55  output = commands.getstatusoutput( aCommand )
56  print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run)
57  for runs in tmpList:
58  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
59  cp(sourceDir,newDir,runs.fileNames)
60  tmpList = []
61  dirSize = 0
62 
63 
64 
65 
66  print totalSize
67  print sortedKeys
68  exit("ok")
69 
70 
71 
72 
73 
74 
75  if not os.path.isdir(destDir):
76  error = "WARNING: destination directory doesn't exist! Creating it..."
77  print error
78  os.mkdir(destDir)
79  copiedFiles = cp(sourceDir,destDir,fileList)
80 
81  if len(copiedFiles) != len(fileList):
82  error = "ERROR: I couldn't copy all files from castor"
83  exit(error)
84 
85  for fileName in fileList:
86  fullFileName = destDir + fileName
87  runNumber = -1;
88  with open(fullFileName,'r') as file:
89  for line in file:
90  if line.find("Runnumber") != -1:
91  tmpRun = int(line.split(' ')[1])
92  if runNumber != -1 and tmpRun != runNumber:
93  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
94  exit(error)
95  runNumber = int(line.split(' ')[1])
96  file.close()
97  newFileName = fileName.replace("None",str(runNumber))
98  if fileName != newFileName:
99  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
100  print aCmd
101  output = commands.getstatusoutput(aCmd)
102  if output[0] != 0:
103  print output[1]
104  else:
105  print "WARNING couldn't find keyword None in file " + fileName
106 
107 
108 
109 
def ls
Definition: eostools.py:348
def main
Definition: splitter.py:19
def getRunNumberFromFileName
Definition: splitter.py:11
double split
Definition: MVATrainer.cc:139