CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Classes | Functions
splitter Namespace Reference

Classes

class  FileObj
 

Functions

def getRunNumberFromFileName
 
def main
 

Function Documentation

def splitter.getRunNumberFromFileName (   fileName)

Definition at line 12 of file splitter.py.

Referenced by main().

12 
13 def getRunNumberFromFileName(fileName):
14  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
15  if not regExp:
16  return -1
17  return long(regExp.group(3))
18 
19 
def getRunNumberFromFileName
Definition: splitter.py:12
def splitter.main ( )

Definition at line 20 of file splitter.py.

References beamvalidation.exit(), getRunNumberFromFileName(), eostools.ls(), print(), submitPVValidationJobs.split(), and str.

20 
21 def main():
22  if len(sys.argv) < 2:
23  error = "Usage: splitter fromDir"
24  exit(error)
25  sourceDir = sys.argv[1] + '/'
26 
27  fileList = ls(sourceDir,".txt")
28 
29  fileObjList = {}
30 
31  totalSize = 0
32  for fileName in fileList:
33  runNumber = getRunNumberFromFileName(fileName)
34  if runNumber not in fileObjList:
35  fileObjList[runNumber] = FileObj()
36  fileObjList[runNumber].run = runNumber
37  fileObjList[runNumber].fileNames.append(fileName)
38  aCommand = 'ls -l '+ sourceDir + fileName
39  output = subprocess.getstatusoutput( aCommand )
40  fileObjList[runNumber].size += int(output[1].split(' ')[4])
41  totalSize += int(output[1].split(' ')[4])
42 
43  sortedKeys = sorted(fileObjList.keys())
44 
45  split=13
46 
47  dirSize = 0
48  tmpList = []
49  for run in sortedKeys:
50  dirSize += fileObjList[run].size
51  tmpList.append(fileObjList[run])
52  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
53  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
54  aCommand = 'mkdir '+ newDir
55  output = subprocess.getstatusoutput( aCommand )
56  print(str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run))
57  for runs in tmpList:
58  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
59  cp(sourceDir,newDir,runs.fileNames)
60  tmpList = []
61  dirSize = 0
62 
63 
64 
65 
66  print(totalSize)
67  print(sortedKeys)
68  exit("ok")
69 
70 
71 
72 
73 
74 
75  if not os.path.isdir(destDir):
76  error = "WARNING: destination directory doesn't exist! Creating it..."
77  print(error)
78  os.mkdir(destDir)
79  copiedFiles = cp(sourceDir,destDir,fileList)
80 
81  if len(copiedFiles) != len(fileList):
82  error = "ERROR: I couldn't copy all files from castor"
83  exit(error)
84 
85  for fileName in fileList:
86  fullFileName = destDir + fileName
87  runNumber = -1;
88  with open(fullFileName,'r') as file:
89  for line in file:
90  if line.find("Runnumber") != -1:
91  tmpRun = int(line.split(' ')[1])
92  if runNumber != -1 and tmpRun != runNumber:
93  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
94  exit(error)
95  runNumber = int(line.split(' ')[1])
96  file.close()
97  newFileName = fileName.replace("None",str(runNumber))
98  if fileName != newFileName:
99  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
100  print(aCmd)
101  output = subprocess.getstatusoutput(aCmd)
102  if output[0] != 0:
103  print(output[1])
104  else:
105  print("WARNING couldn't find keyword None in file " + fileName)
106 
107 
108 
109 
def ls
Definition: eostools.py:349
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def main
Definition: splitter.py:20
def getRunNumberFromFileName
Definition: splitter.py:12
#define str(s)