CMS 3D CMS Logo

Classes | Functions | Variables

splitter Namespace Reference

Classes

class  FileObj

Functions

def appendLinkToList
def backupOriginal
def createHtmlPages
def createMenu
def extractLinks
def getFooter
def getHeader
def getRunNumberFromFileName
def main

Variables

dictionary INDEX = {}
list LINKS = []
list PREFIX = sys.argv[3]
list PROJECT_LOCATION = sys.argv[1]
list sourceFile = PROJECT_LOCATION+sys.argv[2]

Detailed Description

Created on Nov 9, 2010
Updated on Oct 19, 2011

@author: Mantas Stankevicius

Function Documentation

def splitter::appendLinkToList (   line,
  letter 
)

Definition at line 100 of file splitter.py.

00101                                   :
00102     if (not INDEX.has_key(letter)):
00103         subList = [letter, line]
00104         LINKS.append(subList)
00105         INDEX[letter] = letter
00106     else:
00107         for l in LINKS:
00108             if l[0] == letter:
00109                 l.append(line)
00110     

def splitter::backupOriginal ( )

Definition at line 172 of file splitter.py.

00173                     :
00174     fh = open(sourceFile,'r')
00175     html = fh.read()
00176     fh.close()
00177 
00178         
00179     soap = BeautifulSoup(html)
00180     div = soap.find("div", {"class":"tabs2"})
00181     # Adding menu of letters at the end of navigation bar
00182     text = NavigableString(createMenu("All"))
00183     div.append(text)
00184 #    div.insert(div.__len__(), createMenu("All"))
00185     
00186     html = soap.renderContents()
00187     
00188     output = open(PROJECT_LOCATION+"/doc/html/"+PREFIX+"All.html", "w")
00189     output.write(html)
00190     output.close() 

def splitter::createHtmlPages ( )

Definition at line 135 of file splitter.py.

00136                      :
00137     
00138     HTMLHeader = getHeader()
00139     HTMLFooter = getFooter()
00140     
00141     for list in LINKS:
00142         letter = list[0]
00143         
00144         html = HTMLHeader
00145         
00146         for item in list[1:]:
00147             html += item+"\n"
00148         
00149         html += HTMLFooter
00150         
00151         soap = BeautifulSoup(html)
00152         div = soap.find("div", {"class":"tabs2"})
00153 
00154         text = NavigableString(createMenu(letter))
00155         div.append(text)
00156 
00157 #        div.insert(div.__len__(), createMenu(letter))
00158         
00159         html = soap.renderContents()
00160         
00161         path = PROJECT_LOCATION+"/doc/html/"+PREFIX+letter+".html"
00162         output = open(path, "w")
00163         output.write(html)
00164         output.close()
00165         
00166         if letter == "A":
00167             output = open(sourceFile, "w")
00168             output.write(html)
00169             output.close()  
00170         
00171         print PROJECT_LOCATION+"/doc/html/"+PREFIX+letter+".html    Done!"                  

def splitter::createMenu (   letter)

Definition at line 111 of file splitter.py.

00112                       :
00113     html  = "<div class=\"tabs3\">\n"
00114     html += "<ul class=\"tablist\">\n"
00115 
00116     letters = []
00117     for i in INDEX:
00118         letters.append(i)
00119     
00120     letters.sort()
00121     letters.append("All")
00122     
00123     for l in letters:
00124         c = l
00125         current = ""
00126         if c == letter:
00127             current = " class=\"current\""
00128             
00129         html += "<li"+current+"><a href=\""+PREFIX+c+".html\"><span>"+c+"</span></a></li>\n"
00130     
00131     html += "</ul>\n"
00132     html += "</div>\n"
00133     
00134     return html

def splitter::extractLinks ( )
Extracts links from source file 
    from <div class = 'contents'> </div>

Definition at line 67 of file splitter.py.

00068                   :
00069     """ Extracts links from source file 
00070         from <div class = 'contents'> </div>"""
00071         
00072     fh = open(sourceFile,'r')
00073     source = fh.read()
00074     fh.close()
00075     
00076     soup = BeautifulSoup(source)
00077     div = soup.find("div", {"class":"contents"})
00078     
00079     if (div != None):
00080         content = div.renderContents()
00081     
00082     lines = content.split("\n")
00083     for line in lines:
00084         if (line.find("<tr>") != -1):
00085             
00086             indexFrom = line.rfind(".html\">") + 7
00087             indexTo = line.rfind("</a>")
00088             linkText = line[indexFrom:indexTo]
00089             
00090             linkTextParts = linkText.split("::")
00091             
00092             if len(linkTextParts) == 2:
00093                 tmpLine = line.replace(linkText, linkTextParts[1])
00094                 letter = linkTextParts[1][0].upper()
00095                 appendLinkToList(tmpLine, letter)
00096 
00097             letter = linkText[0].upper()
00098             appendLinkToList(line, letter)
00099             

def splitter::getFooter ( )
Reading source file from end until </table>. 
    After </table> begins list of links (reading from end) 

Definition at line 43 of file splitter.py.

00044                :
00045     """ Reading source file from end until </table>. 
00046         After </table> begins list of links (reading from end) 
00047     """
00048     fh = open(sourceFile,'r')
00049     source = fh.read()
00050     fh.close()
00051     
00052     lines = source.split("\n")
00053     lines.reverse()
00054     
00055     html = []
00056     enough = False
00057     
00058     for line in lines:
00059         if (not enough):
00060             html.append(line)
00061             
00062         if line.find("</table>") != -1:
00063             enough = True
00064         
00065     html.reverse()            
00066     return "\n".join(html)   

def splitter::getHeader ( )
Reading source file until <table>. 
    After <table> begins list of links 

Definition at line 20 of file splitter.py.

00021                :
00022     """ Reading source file until <table>. 
00023         After <table> begins list of links 
00024     """
00025     fh = open(sourceFile,'r')
00026     source = fh.read()
00027     fh.close()
00028     
00029     lines = source.split("\n")
00030 
00031     html = []
00032     enough = False
00033     
00034     for line in lines:
00035         if line.find("<table>") != -1:
00036             enough = True
00037             
00038         if (not enough):
00039             html.append(line)
00040             
00041     html.append("<table width=\"100%\">")
00042     return "\n".join(html)

def splitter::getRunNumberFromFileName (   fileName)

Definition at line 11 of file splitter.py.

00012                                       :
00013     regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
00014     if not regExp:
00015         return -1
00016     return long(regExp.group(3))
00017                 
00018 

def splitter::main ( )

Definition at line 19 of file splitter.py.

00020           :
00021     if len(sys.argv) < 2:
00022         error = "Usage: splitter fromDir"
00023         exit(error)
00024     sourceDir = sys.argv[1] + '/'
00025 
00026     fileList = ls(sourceDir,".txt")
00027 
00028     fileObjList = {}
00029 
00030     totalSize = 0
00031     for fileName in fileList:
00032         runNumber = getRunNumberFromFileName(fileName)
00033         if runNumber not in fileObjList:
00034             fileObjList[runNumber] = FileObj()
00035             fileObjList[runNumber].run = runNumber 
00036         fileObjList[runNumber].fileNames.append(fileName) 
00037         aCommand  = 'ls -l '+ sourceDir + fileName 
00038         output = commands.getstatusoutput( aCommand )
00039         fileObjList[runNumber].size += int(output[1].split(' ')[4])
00040         totalSize += int(output[1].split(' ')[4]) 
00041 
00042     sortedKeys = fileObjList.keys()
00043     sortedKeys.sort()
00044 
00045     split=13
00046 
00047     dirSize = 0
00048     tmpList = []
00049     for run in sortedKeys:
00050         dirSize += fileObjList[run].size
00051         tmpList.append(fileObjList[run])
00052         if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
00053             newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
00054             aCommand  = 'mkdir '+ newDir
00055             output = commands.getstatusoutput( aCommand )
00056             print str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) 
00057             for runs in tmpList:
00058                 #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
00059                 cp(sourceDir,newDir,runs.fileNames) 
00060             tmpList = []
00061             dirSize = 0
00062         
00063 
00064 
00065     
00066     print totalSize
00067     print sortedKeys 
00068     exit("ok")    
00069 
00070 
00071 
00072 
00073 
00074 
00075     if not os.path.isdir(destDir):
00076         error = "WARNING: destination directory doesn't exist! Creating it..."
00077         print error
00078         os.mkdir(destDir)
00079     copiedFiles = cp(sourceDir,destDir,fileList)
00080 
00081     if len(copiedFiles) != len(fileList):
00082         error = "ERROR: I couldn't copy all files from castor"
00083         exit(error)
00084 
00085     for fileName in fileList:
00086         fullFileName = destDir + fileName
00087         runNumber = -1;
00088         with open(fullFileName,'r') as file:
00089             for line in file:
00090                 if line.find("Runnumber") != -1:
00091                     tmpRun = int(line.split(' ')[1])
00092                     if runNumber != -1 and tmpRun != runNumber:
00093                         error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
00094                         exit(error)
00095                     runNumber = int(line.split(' ')[1])
00096         file.close()
00097         newFileName = fileName.replace("None",str(runNumber))
00098         if fileName != newFileName:
00099             aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
00100             print aCmd
00101             output =  commands.getstatusoutput(aCmd)
00102             if output[0] != 0:
00103                 print output[1]
00104         else:
00105             print "WARNING couldn't find keyword None in file " + fileName
00106 
00107 
00108 
00109 
        

Variable Documentation

dictionary splitter::INDEX = {}

Definition at line 11 of file splitter.py.

list splitter::LINKS = []
list splitter::PREFIX = sys.argv[3]

Definition at line 197 of file splitter.py.

list splitter::PROJECT_LOCATION = sys.argv[1]

Definition at line 193 of file splitter.py.

Definition at line 195 of file splitter.py.