dd/de6/TableParser_8py_source.html

 # email: cmsdoxy@cern.ch, ali.mehmet.altundag@cern.ch


 # please have a look at the namespaces.html (namespace list) and annotated.html

 # (~class list) html files to understand the tags/attributes that we use in

 # this script.


 from BeautifulSoup import *

 import sys, os, copy


 htmlFullPath     = None

 htmlFilePath     = None

 htmlFileName     = None

 fileNameTemplate = None # html file name template

 htmlPage         = None

 tableClassName   = 'directory'


 # load rows from the table in [C]lass and [N]amespace list pages  and prapere

 # pages in the following structure: pages = {'A' : [...], 'B' : [...]}

 def extractPages(configFileFlag = False):

     # initial page, A

     pages = {'A':[]}

     # find all class/namespace talbe rows.

     table = htmlPage.find('table', {'class' : tableClassName})

     for row in table.findAll('tr'):

         # please see the related html file (annotated.html) to understand the

         # approach here. you will see that, only hidden rows have style

         # attribute and these hidden rows must be added to pages of their

         # parents. This is why we need to check whether row has a style

         # attribute or not.

         styleFlag = False

         if row.has_key('style'): styleFlag = True

         # change the first letter if row is not hidden (child) one

         if not styleFlag: firstLetter = row.findAll('td')[0].text[0].upper()

         # if pages dict doesn't have the page yet..

         if not pages.has_key(firstLetter):

             pages[firstLetter] = []

         # insert the row into the related page

         if configFileFlag:

             url = row.find('a')['href']

             if '_cff' in url or '_cfi' in url or '_cfg' in url:

                 pages[firstLetter].append(row)

         else:

             pages[firstLetter].append(row)

     return pages


 # load rows from the package documentation page. output structure:

 # pages = {'PackageA' : [..], 'PackageB' : [...]}

 def extractPagesForPackage():

     # initial page, A

     pages = {}

     table = htmlPage.find('table', {'class' : tableClassName})

     for row in table.findAll('tr'):

         # first cell contains name of the package...

         name = row.findAll('td')[0].text

         # parse package names --please have a look at the pages.html file

         name = name[name.find(' '):name.find('/')].strip()

         # if the package is not added yet

         if not pages.has_key(name): pages[name] = []

         pages[name].append(row)

     return pages


 # generate alphabetic tab for html pages that will be generated by this script

 def generateTab(items, curr, tabClass = 'tabs3'):

     itemTagMap = {}; tab = ''

     for item in items:

         fn  = fileNameTemplate % item.replace(' ', '_') # generate file name

         if item != curr: tab += '<li><a href="%s">%s</a></li>' % (fn, item)

         else: tab += '<li class="current"><a href="%s">%s</a></li>'%(fn, item)

     return '<div class="%s"><ul class="tablist">%s</ul></div>' % (tabClass,tab)


 if __name__ == "__main__":

     if len(sys.argv) < 2:

         sys.stderr.write("not enough parameter!\n")

         sys.exit(1)


     # initialize variables

     htmlFullPath     = sys.argv[1]

     htmlFilePath     = os.path.split(htmlFullPath)[0]

     htmlFileName     = os.path.split(htmlFullPath)[1]

     fileNameTemplate = htmlFileName.replace('.html', '_%s.html')


     # load the html page

     with open(htmlFullPath) as f:

         htmlPage = f.read()

         htmlPage = BeautifulSoup(htmlPage)


     # please have a look at the pages.html page. You will see that class name

     # of the related tab, which we will use to put 'index tab' by using this

     # tab, is different for pages.html file. For namespaces.html (namespace

     # list) and annotated.html (~class list) files, class names are the same

     # tabs2. this is why we are setting 'the destination tab class name' up

     # differently depending on the html file name.

     if htmlFileName == 'packageDocumentation.html':

         pages = extractPagesForPackage()

         destTabClassName = 'tabs'

     elif htmlFileName == 'configfiles.html':

         pages = extractPages(configFileFlag = True)

         destTabClassName = 'tabs2'

     else:

         pages = extractPages()

         destTabClassName = 'tabs2'


     allRows = []

     pageNames = pages.keys(); pageNames.sort()

     for page in pageNames:

         allRows = allRows + pages[page]

     pages['All'] = allRows

     pageNames.append('All')


     # prepare the template

     table     = htmlPage.find('table', {'class' : tableClassName})

     # generate template (clean whole table content)

     for row in table.findAll('tr'):

         row.extract()


     # generate pages

     for page in pageNames:

         print 'generating %s...' % (fileNameTemplate % page)

         temp   = BeautifulSoup(str(htmlPage))

         table  = temp.find('table', {'class' : tableClassName})

         oldTab = temp.find('div', {'class' : destTabClassName})

         newTab = generateTab(pageNames, page)

         oldTab.replaceWith(BeautifulSoup(oldTab.prettify() + str(newTab)))

         for row in pages[page]:

             table.append(row)

         # replace blank character with '_'. Please notice that you will not

         # be able to use original page name after this line.

         page = page.replace(' ', '_')

         with open('%s/%s'%(htmlFilePath, fileNameTemplate % page), 'w') as f:

             f.write(str(temp))

python.multivaluedict.append
def append
Definition: multivaluedict.py:73

BeautifulSoup.BeautifulSoup
Definition: BeautifulSoup.py:1470

TableParser.extractPagesForPackage
def extractPagesForPackage
Definition: TableParser.py:48

TableParser.generateTab
def generateTab
Definition: TableParser.py:63

pileupCalc.upper
upper
Definition: pileupCalc.py:238

TableParser.extractPages
def extractPages
Definition: TableParser.py:19