CMS 3D CMS Logo

/data/doxygen/doxygen-1.7.3/gen/CMSSW_4_2_8/src/RecoVertex/BeamSpotProducer/scripts/splitter.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 import sys,os,commands
00003 from CommonMethods import *
00004 
00005 class FileObj:
00006     def __init__(self):
00007         self.run       = 0
00008         self.size      = 0
00009         self.fileNames = []                 
00010 
00011 def getRunNumberFromFileName(fileName):
00012     regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
00013     if not regExp:
00014         return -1
00015     return long(regExp.group(3))
00016                 
00017 
00018 
00019 def main():
00020     if len(sys.argv) < 2:
00021         error = "Usage: splitter fromDir"
00022         exit(error)
00023     sourceDir = sys.argv[1] + '/'
00024 
00025     fileList = ls(sourceDir,".txt")
00026 
00027     fileObjList = {}
00028 
00029     totalSize = 0
00030     for fileName in fileList:
00031         runNumber = getRunNumberFromFileName(fileName)
00032         if runNumber not in fileObjList:
00033             fileObjList[runNumber] = FileObj()
00034             fileObjList[runNumber].run = runNumber 
00035         fileObjList[runNumber].fileNames.append(fileName) 
00036         aCommand  = 'ls -l '+ sourceDir + fileName 
00037         output = commands.getstatusoutput( aCommand )
00038         fileObjList[runNumber].size += int(output[1].split(' ')[4])
00039         totalSize += int(output[1].split(' ')[4]) 
00040 
00041     sortedKeys = fileObjList.keys()
00042     sortedKeys.sort()
00043 
00044     split=13
00045 
00046     dirSize = 0
00047     tmpList = []
00048     for run in sortedKeys:
00049         dirSize += fileObjList[run].size
00050         tmpList.append(fileObjList[run])
00051         if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
00052             newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
00053             aCommand  = 'mkdir '+ newDir
00054             output = commands.getstatusoutput( aCommand )
00055             print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) 
00056             for runs in tmpList:
00057                 #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
00058                 cp(sourceDir,newDir,runs.fileNames) 
00059             tmpList = []
00060             dirSize = 0
00061         
00062 
00063 
00064     
00065     print totalSize
00066     print sortedKeys 
00067     exit("ok")    
00068 
00069 
00070 
00071 
00072 
00073 
00074     if not os.path.isdir(destDir):
00075         error = "WARNING: destination directory doesn't exist! Creating it..."
00076         print error
00077         os.mkdir(destDir)
00078     copiedFiles = cp(sourceDir,destDir,fileList)
00079 
00080     if len(copiedFiles) != len(fileList):
00081         error = "ERROR: I couldn't copy all files from castor"
00082         exit(error)
00083 
00084     for fileName in fileList:
00085         fullFileName = destDir + fileName
00086         runNumber = -1;
00087         with open(fullFileName,'r') as file:
00088             for line in file:
00089                 if line.find("Runnumber") != -1:
00090                     tmpRun = int(line.split(' ')[1])
00091                     if runNumber != -1 and tmpRun != runNumber:
00092                         error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
00093                         exit(error)
00094                     runNumber = int(line.split(' ')[1])
00095         file.close()
00096         newFileName = fileName.replace("None",str(runNumber))
00097         if fileName != newFileName:
00098             aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
00099             print aCmd
00100             output =  commands.getstatusoutput(aCmd)
00101             if output[0] != 0:
00102                 print output[1]
00103         else:
00104             print "WARNING couldn't find keyword None in file " + fileName
00105 
00106 
00107 
00108 
00109         
00110 if __name__ == "__main__":
00111     main()