CMS 3D CMS Logo

Classes | Functions
splitter Namespace Reference

Classes

class  FileObj
 

Functions

def getRunNumberFromFileName (fileName)
 
def main ()
 

Function Documentation

◆ getRunNumberFromFileName()

def splitter.getRunNumberFromFileName (   fileName)

Definition at line 12 of file splitter.py.

Referenced by main().

12 def getRunNumberFromFileName(fileName):
13  regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
14  if not regExp:
15  return -1
16  return long(regExp.group(3))
17 
18 
19 
def getRunNumberFromFileName(fileName)
Definition: splitter.py:12

◆ main()

def splitter.main ( )

Definition at line 20 of file splitter.py.

References beamvalidation.exit(), getRunNumberFromFileName(), createfilelist.int, relval_nano.ls, print(), submitPVValidationJobs.split(), and str.

20 def main():
21  if len(sys.argv) < 2:
22  error = "Usage: splitter fromDir"
23  exit(error)
24  sourceDir = sys.argv[1] + '/'
25 
26  fileList = ls(sourceDir,".txt")
27 
28  fileObjList = {}
29 
30  totalSize = 0
31  for fileName in fileList:
32  runNumber = getRunNumberFromFileName(fileName)
33  if runNumber not in fileObjList:
34  fileObjList[runNumber] = FileObj()
35  fileObjList[runNumber].run = runNumber
36  fileObjList[runNumber].fileNames.append(fileName)
37  aCommand = 'ls -l '+ sourceDir + fileName
38  output = subprocess.getstatusoutput( aCommand )
39  fileObjList[runNumber].size += int(output[1].split(' ')[4])
40  totalSize += int(output[1].split(' ')[4])
41 
42  sortedKeys = sorted(fileObjList.keys())
43 
44  split=13
45 
46  dirSize = 0
47  tmpList = []
48  for run in sortedKeys:
49  dirSize += fileObjList[run].size
50  tmpList.append(fileObjList[run])
51  if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
52  newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
53  aCommand = 'mkdir '+ newDir
54  output = subprocess.getstatusoutput( aCommand )
55  print(str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run))
56  for runs in tmpList:
57  #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
58  cp(sourceDir,newDir,runs.fileNames)
59  tmpList = []
60  dirSize = 0
61 
62 
63 
64 
65  print(totalSize)
66  print(sortedKeys)
67  exit("ok")
68 
69 
70 
71 
72 
73 
74  if not os.path.isdir(destDir):
75  error = "WARNING: destination directory doesn't exist! Creating it..."
76  print(error)
77  os.mkdir(destDir)
78  copiedFiles = cp(sourceDir,destDir,fileList)
79 
80  if len(copiedFiles) != len(fileList):
81  error = "ERROR: I couldn't copy all files from castor"
82  exit(error)
83 
84  for fileName in fileList:
85  fullFileName = destDir + fileName
86  runNumber = -1;
87  with open(fullFileName,'r') as file: for line in file:
88  if line.find("Runnumber") != -1:
89  tmpRun = int(line.split(' ')[1])
90  if runNumber != -1 and tmpRun != runNumber:
91  error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
92  exit(error)
93  runNumber = int(line.split(' ')[1])
94  file.close()
95  newFileName = fileName.replace("None",str(runNumber))
96  if fileName != newFileName:
97  aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
98  print(aCmd)
99  output = subprocess.getstatusoutput(aCmd)
100  if output[0] != 0:
101  print(output[1])
102  else:
103  print("WARNING couldn't find keyword None in file " + fileName)
104 
105 
106 
107 
108 
109 
def main()
Definition: splitter.py:20
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def getRunNumberFromFileName(fileName)
Definition: splitter.py:12
#define str(s)
def exit(msg="")