CMS 3D CMS Logo

TableParser.py
Go to the documentation of this file.
1 from __future__ import print_function
2 from __future__ import absolute_import
3 # email: cmsdoxy@cern.ch, ali.mehmet.altundag@cern.ch
4 
5 # please have a look at the namespaces.html (namespace list) and annotated.html
6 # (~class list) html files to understand the tags/attributes that we use in
7 # this script.
8 
9 from .BeautifulSoup import *
10 import sys, os, copy
11 
12 htmlFullPath = None
13 htmlFilePath = None
14 htmlFileName = None
15 fileNameTemplate = None # html file name template
16 htmlPage = None
17 tableClassName = 'directory'
18 
19 # load rows from the table in [C]lass and [N]amespace list pages and prapere
20 # pages in the following structure: pages = {'A' : [...], 'B' : [...]}
21 def extractPages(configFileFlag = False):
22  # initial page, A
23  pages = {'A':[]}
24  # find all class/namespace talbe rows.
25  table = htmlPage.find('table', {'class' : tableClassName})
26  for row in table.findAll('tr'):
27  # please see the related html file (annotated.html) to understand the
28  # approach here. you will see that, only hidden rows have style
29  # attribute and these hidden rows must be added to pages of their
30  # parents. This is why we need to check whether row has a style
31  # attribute or not.
32  styleFlag = False
33  if 'style' in row: styleFlag = True
34  # change the first letter if row is not hidden (child) one
35  if not styleFlag: firstLetter = row.findAll('td')[0].text[0].upper()
36  # if pages dict doesn't have the page yet..
37  if firstLetter not in pages:
38  pages[firstLetter] = []
39  # insert the row into the related page
40  if configFileFlag:
41  url = row.find('a')['href']
42  if '_cff' in url or '_cfi' in url or '_cfg' in url:
43  pages[firstLetter].append(row)
44  else:
45  pages[firstLetter].append(row)
46  return pages
47 
48 # load rows from the package documentation page. output structure:
49 # pages = {'PackageA' : [..], 'PackageB' : [...]}
51  # initial page, A
52  pages = {}
53  table = htmlPage.find('table', {'class' : tableClassName})
54  for row in table.findAll('tr'):
55  # first cell contains name of the package...
56  name = row.findAll('td')[0].text
57  # parse package names --please have a look at the pages.html file
58  name = name[name.find(' '):name.find('/')].strip()
59  # if the package is not added yet
60  if name not in pages: pages[name] = []
61  pages[name].append(row)
62  return pages
63 
64 # generate alphabetic tab for html pages that will be generated by this script
65 def generateTab(items, curr, tabClass = 'tabs3'):
66  itemTagMap = {}; tab = ''
67  for item in items:
68  fn = fileNameTemplate % item.replace(' ', '') # generate file name
69  if item != curr: tab += '<li><a href="%s">%s</a></li>' % (fn, item)
70  else: tab += '<li class="current"><a href="%s">%s</a></li>'%(fn, item)
71  return '<div class="%s"><ul class="tablist">%s</ul></div>' % (tabClass,tab)
72 
73 if __name__ == "__main__":
74  if len(sys.argv) < 2:
75  sys.stderr.write("not enough parameter!\n")
76  sys.exit(1)
77 
78  # initialize variables
79  htmlFullPath = sys.argv[1]
80  htmlFilePath = os.path.split(htmlFullPath)[0]
81  htmlFileName = os.path.split(htmlFullPath)[1]
82  fileNameTemplate = htmlFileName.replace('.html', '_%s.html')
83 
84  # load the html page
85  with open(htmlFullPath) as f:
86  htmlPage = f.read()
87  htmlPage = BeautifulSoup(htmlPage)
88 
89  # please have a look at the pages.html page. You will see that class name
90  # of the related tab, which we will use to put 'index tab' by using this
91  # tab, is different for pages.html file. For namespaces.html (namespace
92  # list) and annotated.html (~class list) files, class names are the same
93  # tabs2. this is why we are setting 'the destination tab class name' up
94  # differently depending on the html file name.
95  if htmlFileName == 'packageDocumentation.html':
96  pages = extractPagesForPackage()
97  destTabClassName = 'tabs'
98  elif htmlFileName == 'configfiles.html':
99  pages = extractPages(configFileFlag = True)
100  destTabClassName = 'tabs2'
101  else:
102  pages = extractPages()
103  destTabClassName = 'tabs2'
104 
105  allRows = []
106  pageNames = pages.keys(); pageNames.sort()
107  for page in pageNames:
108  allRows = allRows + pages[page]
109  pages['All'] = allRows
110  pageNames.append('All')
111 
112  # prepare the template
113  table = htmlPage.find('table', {'class' : tableClassName})
114  # generate template (clean whole table content)
115  for row in table.findAll('tr'):
116  row.extract()
117 
118  # generate pages
119  for page in pageNames:
120  print('generating %s...' % (fileNameTemplate % page))
121  temp = BeautifulSoup(str(htmlPage))
122  table = temp.find('table', {'class' : tableClassName})
123  oldTab = temp.find('div', {'class' : destTabClassName})
124  newTab = generateTab(pageNames, page)
125  oldTab.replaceWith(BeautifulSoup(oldTab.prettify() + str(newTab)))
126  for row in pages[page]:
127  table.append(row)
128  # replace blank character with '_'. Please notice that you will not
129  # be able to use original page name after this line.
130  page = page.replace(' ', '_')
131  with open('%s/%s'%(htmlFilePath, fileNameTemplate % page), 'w') as f:
132  f.write(str(temp))
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def extractPagesForPackage()
Definition: TableParser.py:50
def extractPages(configFileFlag=False)
Definition: TableParser.py:21
def generateTab(items, curr, tabClass='tabs3')
Definition: TableParser.py:65
#define str(s)