dd/de6/TableParser_8py_source.html

 # email: cmsdoxy@cern.ch, ali.mehmet.altundag@cern.ch

 # please have a look at the namespaces.html (namespace list) and annotated.html
 # (~class list) html files to understand the tags/attributes that we use in
 # this script.

 from BeautifulSoup import *
 import sys, os, copy

 htmlFullPath     = None
 htmlFilePath     = None
 htmlFileName     = None
 fileNameTemplate = None # html file name template
 htmlPage         = None
 tableClassName   = 'directory'

 # load rows from the table in [C]lass and [N]amespace list pages  and prapere
 # pages in the following structure: pages = {'A' : [...], 'B' : [...]}
 def extractPages(configFileFlag = False):
     # initial page, A
     pages = {'A':[]}
     # find all class/namespace talbe rows.
     table = htmlPage.find('table', {'class' : tableClassName})
     for row in table.findAll('tr'):
         # please see the related html file (annotated.html) to understand the
         # approach here. you will see that, only hidden rows have style
         # attribute and these hidden rows must be added to pages of their
         # parents. This is why we need to check whether row has a style
         # attribute or not.
         styleFlag = False
         if row.has_key('style'): styleFlag = True
         # change the first letter if row is not hidden (child) one
         if not styleFlag: firstLetter = row.findAll('td')[0].text[0].upper()
         # if pages dict doesn't have the page yet..
         if not pages.has_key(firstLetter):
             pages[firstLetter] = []
         # insert the row into the related page
         if configFileFlag:
             url = row.find('a')['href']
             if '_cff' in url or '_cfi' in url or '_cfg' in url:
                 pages[firstLetter].append(row)
         else:
             pages[firstLetter].append(row)
     return pages

 # load rows from the package documentation page. output structure:
 # pages = {'PackageA' : [..], 'PackageB' : [...]}
 def extractPagesForPackage():
     # initial page, A
     pages = {}
     table = htmlPage.find('table', {'class' : tableClassName})
     for row in table.findAll('tr'):
         # first cell contains name of the package...
         name = row.findAll('td')[0].text
         # parse package names --please have a look at the pages.html file
         name = name[name.find(' '):name.find('/')].strip()
         # if the package is not added yet
         if not pages.has_key(name): pages[name] = []
         pages[name].append(row)
     return pages

 # generate alphabetic tab for html pages that will be generated by this script
 def generateTab(items, curr, tabClass = 'tabs3'):
     itemTagMap = {}; tab = ''
     for item in items:
         fn  = fileNameTemplate % item.replace(' ', '_') # generate file name
         if item != curr: tab += '<li><a href="%s">%s</a></li>' % (fn, item)
         else: tab += '<li class="current"><a href="%s">%s</a></li>'%(fn, item)
     return '<div class="%s"><ul class="tablist">%s</ul></div>' % (tabClass,tab)

 if __name__ == "__main__":
     if len(sys.argv) < 2:
         sys.stderr.write("not enough parameter!\n")
         sys.exit(1)

     # initialize variables
     htmlFullPath     = sys.argv[1]
     htmlFilePath     = os.path.split(htmlFullPath)[0]
     htmlFileName     = os.path.split(htmlFullPath)[1]
     fileNameTemplate = htmlFileName.replace('.html', '_%s.html')

     # load the html page
     with open(htmlFullPath) as f:
         htmlPage = f.read()
         htmlPage = BeautifulSoup(htmlPage)

     # please have a look at the pages.html page. You will see that class name
     # of the related tab, which we will use to put 'index tab' by using this
     # tab, is different for pages.html file. For namespaces.html (namespace
     # list) and annotated.html (~class list) files, class names are the same
     # tabs2. this is why we are setting 'the destination tab class name' up
     # differently depending on the html file name.
     if htmlFileName == 'packageDocumentation.html':
         pages = extractPagesForPackage()
         destTabClassName = 'tabs'
     elif htmlFileName == 'configfiles.html':
         pages = extractPages(configFileFlag = True)
         destTabClassName = 'tabs2'
     else:
         pages = extractPages()
         destTabClassName = 'tabs2'

     allRows = []
     pageNames = pages.keys(); pageNames.sort()
     for page in pageNames:
         allRows = allRows + pages[page]
     pages['All'] = allRows
     pageNames.append('All')

     # prepare the template
     table     = htmlPage.find('table', {'class' : tableClassName})
     # generate template (clean whole table content)
     for row in table.findAll('tr'):
         row.extract()

     # generate pages
     for page in pageNames:
         print 'generating %s...' % (fileNameTemplate % page)
         temp   = BeautifulSoup(str(htmlPage))
         table  = temp.find('table', {'class' : tableClassName})
         oldTab = temp.find('div', {'class' : destTabClassName})
         newTab = generateTab(pageNames, page)
         oldTab.replaceWith(BeautifulSoup(oldTab.prettify() + str(newTab)))
         for row in pages[page]:
             table.append(row)
         # replace blank character with '_'. Please notice that you will not
         # be able to use original page name after this line.
         page = page.replace(' ', '_')
         with open('%s/%s'%(htmlFilePath, fileNameTemplate % page), 'w') as f:
             f.write(str(temp))
TableParser.extractPagesForPackage
def extractPagesForPackage()
Definition: TableParser.py:48

BeautifulSoup.BeautifulSoup
Definition: BeautifulSoup.py:1470

TableParser.extractPages
def extractPages(configFileFlag=False)
Definition: TableParser.py:19

mps_setup.append
append
Definition: mps_setup.py:85

digitizers_cfi.strip
strip
Definition: digitizers_cfi.py:19

TableParser.generateTab
def generateTab(items, curr, tabClass='tabs3')
Definition: TableParser.py:63

pileupCalc.upper
upper
Definition: pileupCalc.py:241

str
#define str(s)
Definition: TestProcessor.cc:48