CMS 3D CMS Logo

Classes | Functions

splitter Namespace Reference

Classes

class  FileObj

Functions

def getRunNumberFromFileName
def main

Function Documentation

def splitter::getRunNumberFromFileName (   fileName)

Definition at line 11 of file splitter.py.

00012                                       :
00013     regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
00014     if not regExp:
00015         return -1
00016     return long(regExp.group(3))
00017                 
00018 

def splitter::main ( )

Definition at line 19 of file splitter.py.

00020           :
00021     if len(sys.argv) < 2:
00022         error = "Usage: splitter fromDir"
00023         exit(error)
00024     sourceDir = sys.argv[1] + '/'
00025 
00026     fileList = ls(sourceDir,".txt")
00027 
00028     fileObjList = {}
00029 
00030     totalSize = 0
00031     for fileName in fileList:
00032         runNumber = getRunNumberFromFileName(fileName)
00033         if runNumber not in fileObjList:
00034             fileObjList[runNumber] = FileObj()
00035             fileObjList[runNumber].run = runNumber 
00036         fileObjList[runNumber].fileNames.append(fileName) 
00037         aCommand  = 'ls -l '+ sourceDir + fileName 
00038         output = commands.getstatusoutput( aCommand )
00039         fileObjList[runNumber].size += int(output[1].split(' ')[4])
00040         totalSize += int(output[1].split(' ')[4]) 
00041 
00042     sortedKeys = fileObjList.keys()
00043     sortedKeys.sort()
00044 
00045     split=13
00046 
00047     dirSize = 0
00048     tmpList = []
00049     for run in sortedKeys:
00050         dirSize += fileObjList[run].size
00051         tmpList.append(fileObjList[run])
00052         if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
00053             newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
00054             aCommand  = 'mkdir '+ newDir
00055             output = commands.getstatusoutput( aCommand )
00056             print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) 
00057             for runs in tmpList:
00058                 #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
00059                 cp(sourceDir,newDir,runs.fileNames) 
00060             tmpList = []
00061             dirSize = 0
00062         
00063 
00064 
00065     
00066     print totalSize
00067     print sortedKeys 
00068     exit("ok")    
00069 
00070 
00071 
00072 
00073 
00074 
00075     if not os.path.isdir(destDir):
00076         error = "WARNING: destination directory doesn't exist! Creating it..."
00077         print error
00078         os.mkdir(destDir)
00079     copiedFiles = cp(sourceDir,destDir,fileList)
00080 
00081     if len(copiedFiles) != len(fileList):
00082         error = "ERROR: I couldn't copy all files from castor"
00083         exit(error)
00084 
00085     for fileName in fileList:
00086         fullFileName = destDir + fileName
00087         runNumber = -1;
00088         with open(fullFileName,'r') as file:
00089             for line in file:
00090                 if line.find("Runnumber") != -1:
00091                     tmpRun = int(line.split(' ')[1])
00092                     if runNumber != -1 and tmpRun != runNumber:
00093                         error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
00094                         exit(error)
00095                     runNumber = int(line.split(' ')[1])
00096         file.close()
00097         newFileName = fileName.replace("None",str(runNumber))
00098         if fileName != newFileName:
00099             aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
00100             print aCmd
00101             output =  commands.getstatusoutput(aCmd)
00102             if output[0] != 0:
00103                 print output[1]
00104         else:
00105             print "WARNING couldn't find keyword None in file " + fileName
00106 
00107 
00108 
00109