CMS 3D CMS Logo

All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MainPageGenerator.py
Go to the documentation of this file.
1 # email: cmsdoxy@cern.ch, ali.mehmet.altundag@cern.ch
2 
3 # this script generates main pages for CMSSW Refman by using various sources
4 # such as, doxygen generated html files, persons (work on CMSSW) and their
5 # email details.. as it is stated in other parsers, in future, you may need
6 # to change html tag/attr names depending on output of new doxygen version.
7 # this script needs:
8 # + index.html : will be used as a template file.
9 # : keep in mind that, this file is source of the doc/html
10 # path source. please see how I set the htmlFilePath var.
11 # + files.html : source of interface files
12 # + pages.html : to get package documentation links
13 # + classes.html : to get documentation page links
14 
15 import sys, os, urllib2, copy
16 from BeautifulSoup import *
17 try: import json
18 except ImportError: import simplejson as json
19 
20 htmlFullPath = None
21 htmlFilePath = None
22 htmlFileName = None
23 htmlPage = None
24 # contentTmplOrg: we need to keep original html source to fix BeautifulSoup
25 # script tag bug. The problem is that when we edit something by using
26 # BeautifulSoup, we are not able to play with eddited tags -this can be seen
27 # as another bug... Please have a look at the bsBugFix function to understand
28 # why wee need to keep the content html file. --note that this is really
29 # sensetive approach, you may need to edit something in this python file if
30 # you change something...
31 contentTmplOrg = None
32 contentTmpl = None
33 dataSrc = 'http://cmsdoxy.web.cern.ch/cmsdoxy/cmssw/'
34 githubBase = 'https://github.com/cms-sw/cmssw/tree/{0}/{1}'
35 data = None
36 cmsswVersion = None
37 # tree view template
38 treeViewTmpl = None
39 
40 def getFiles(filesPagePath):
41  data = {}
42  # read and parse files.html to get the file hierarchy
43  with open(filesPagePath) as f: page = f.read()
44  page = BeautifulSoup(page)
45  # please have a look at the files.html page to understand the approach.
46  # in short, we use number of '_' character in the id attr to read the
47  # file hierarchy.
48  table = page.find('table', {'class' : 'directory'})
49  level = 0
50  path = []
51  for row in table.findAll('tr'):
52  # first cell is the cell where the info is stored
53  id = row['id']; cell = row.find('td')
54  text = cell.text; url = '../' + cell.find('a')['href']
55  currentLevel = id.count('_')
56  # if current level is more than old one, push current item
57  if currentLevel > level:
58  path.append(text)
59  # if current level equals to old one, pop anmd push (replace)
60  elif currentLevel == level:
61  path.pop(len(path) - 1)
62  path.append(text)
63  else:
64  # if current level is less than old one, pop all items to blance
65  # the level. 'plus one' in the loop is to replace last item
66  for i in range(level - currentLevel + 1):
67  path.pop(len(path) - 1)
68  path.append(text)
69  level = id.count('_')
70  # skip files which are not interface
71  if not 'interface' in path: continue
72  # no need to have 'interface' node on the tree
73  pathWithoutInterface = copy.copy(path)
74  pathWithoutInterface.remove('interface')
75  # conver the path into tree structure
76  node = data
77  for i in pathWithoutInterface:
78  if not node.has_key(i):
79  node[i] = {}
80  node = node[i]
81  return data
82 
83 def getPackages(packagesPagePath):
84  data = {}
85  with open(packagesPagePath) as f: page = f.read()
86  page = BeautifulSoup(page)
87  table = page.find('table', {'class' : 'directory'})
88  for row in table.findAll('tr'):
89  cell = row.find('td')
90  url = '../' + cell.find('a')['href']
91  # yeah, it is not that good method to parse a string but it is
92  # simple... please see the pages.html file.
93  pkg = cell.text.replace('Package ', '').split('/')
94  if not data.has_key(pkg[0]): data[pkg[0]] = {}
95  if len(pkg) == 2: data[pkg[0]][pkg[1]] = url
96  else: data[pkg[0]][pkg[0]] = url
97  return data
98 
99 def getClasses(classesPagePath):
100  data = {}
101  with open(classesPagePath) as f: page = f.read()
102  page = BeautifulSoup(page)
103  content = page.find('div', {'class' : 'contents'})
104  for cell in content.findAll('td'):
105  aTag = cell.find('a')
106  if not aTag or not aTag.has_key('href'): continue
107  data[aTag.text] = '../' + aTag['href']
108  return data
109 
111  # please notice the following hard coded tags and class names, you may need
112  # to change them in future if doxygen changes its html output structure
113  header = htmlPage.find('div', {'class' : 'header'})
114  content = htmlPage.find('div', {'class' : 'contents'})
115 
116  for tag in header.findAll():
117  tag.extract()
118  for tag in content.findAll():
119  tag.extract()
120 
122  if inp.find_parent("script") is None: return EntitySubstitution.substitute_html(inp)
123  else: return inp
124 
125 def bsBugFix():
126  # this function fixes script tag bug of beautifulsoup (bs). bs is escaping
127  # javascript operators according to the html escape characters, such as
128  # > -> ">". The method to ged rid of this issue is to replace script
129  # tags with their original versions in the string level
130  html = str(htmlPage)
131  for scriptTag in BeautifulSoup(contentTmplOrg).findAll('script'):
132  js = scriptTag.text
133  html = html.replace(str(scriptTag), '<script>%s</script>' % js)
134  return html
135 
136 def fillContentTemplate(domains):
137  rows = ''
138  rowTmpl = '<tr id="{0}"><td width="50%">{1}</td><td>{2}</td></tr>'
139  aTmpl = """<tr style="padding:0"><td colspan="2" style="padding:0">
140  <div class="accordion" id="{0}">
141  <iframe width="100%" height="250px" frameborder="0"
142  data-src="iframes/{0}.html"> </iframe>
143  </div></td></tr>"""
144  domainNames = domains.keys()
145  domainNames.sort()
146  for domain in domainNames:
147  persons = domains[domain].keys()
148  persons.sort()
149  cCell = ''
150  for person in persons:
151  email = domains[domain][person]
152  cCell = cCell+'<a href="mailto:{0}">{0}<a/>, '.format(person,email)
153  cCell = cCell.rstrip(', ')
154  escapedDomainName = domain.replace(' ', '')
155  rows = rows + rowTmpl.format(escapedDomainName, domain, cCell)
156  rows = rows + aTmpl.format(escapedDomainName)
157  contentTmpl.find('table').append(BeautifulSoup(rows))
158  # put cmssw version
159  contentTmpl.find('h2', {'id' : 'version'}).append(cmsswVersion)
160  content = htmlPage.find('div', {'class' : 'contents'})
161  content.append(contentTmpl)
162 
163 def generateTree(tree):
164  if type(tree) == dict and len(tree) == 0: return BeautifulSoup('')
165  # our recursive function to generate domain tree views
166  root = BeautifulSoup('<ul></ul>')
167  names = tree.keys(); names.sort()
168  for name in names:
169  node = BeautifulSoup('<li><div></div></li>')
170  if type(tree[name]) == dict:
171  title = BeautifulSoup('<span class="folder"></span>')
172  title.span.append(name)
173  node.li.append(title)
174  # __git__ and __packageDoc__ are special keys which address links,
175  # github and packade documentation links. please see in the section
176  # that we merge all what we have (under the __main__ block)
177  for i in ['__git__', '__packageDoc__']:
178  if not i in tree[name]: continue
179  link = BeautifulSoup(' <a></a>')
180  link.a['target'] = '_blank'
181  link.a['href'] = tree[name][i]
182  link.a.append('[%s]' % i.replace('_', ''))
183  del tree[name][i]
184  title.span.append(link)
185  if len(tree[name]) == 0:
186  title.span['class'] = 'emptyFolder'
187  else: node.li.div['class'] = 'hitarea expandable-hitarea'
188  node.li.append(generateTree(tree[name]))
189  elif type(tree[name]) == str or type(tree[name]) == unicode:
190  link = BeautifulSoup('<a><span class="file"></span></a>')
191  link.a['target'] = '_blank'
192  link.a['href'] = tree[name]
193  link.a.span.append(name)
194  node.li.append(link)
195  else:
196  node.li.append(name)
197  root.ul.append(node)
198  return root
199 
200 def generateTreeViewPage(tree, name):
201  page = BeautifulSoup(treeViewTmpl)
202  treeTag = page.find('ul', {'id' : 'browser'})
203  treeTag.append(generateTree(tree))
204  twikiLink = page.find('a', {'id' : 'twiki'})
205  if name in data['TWIKI_PAGES']:
206  twikiLink['href'] = data['TWIKI_PAGES'][name]
207  else:
208  twikiLink.extract()
209  return page
210 
211 
212 if __name__ == "__main__":
213  if len(sys.argv) < 4:
214  sys.stderr.write("not enough parameter!\n")
215  sys.stderr.write("first pram must be full path of index.html page\n")
216  sys.stderr.write("second pram must be full path of index template\n")
217  sys.stderr.write("third one must be the CMSSW version\n")
218  sys.exit(1)
219 
220  htmlFullPath = sys.argv[1]
221  # load index.html content template
222  with open('%s/IndexContentTemplate.html' % sys.argv[2]) as f:
223  contentTmplOrg = f.read()
224  with open('%s/TreeViewTemplate.html' % sys.argv[2]) as f:
225  treeViewTmpl = f.read()
226  contentTmpl = BeautifulSoup(contentTmplOrg)
227  dataSrc = dataSrc + sys.argv[3]
228  htmlFilePath = os.path.split(htmlFullPath)[0]
229  htmlFileName = os.path.split(htmlFullPath)[1]
230  cmsswVersion = sys.argv[3]
231 
232  # load html page
233  with open(htmlFullPath) as f: htmlPage = BeautifulSoup(f.read())
234 
235  # get json data from cmsdoxy/CMSSWTagCollector
236  successFlag = False; loopLimit = 3
237  while(not successFlag and loopLimit > 0):
238  loopLimit = loopLimit - 1
239  try:
240  print 'reading data from cmsdoxy/CMSSWTagCollector...'
241  data = urllib2.urlopen(dataSrc).read()
242  data = json.loads(data)
243  successFlag = True
244  except:
245  print 'I couldn\'t get the data. Trying again...'
246  # if you cannot get data from the CMSSWTagCollector,
247  # inform user and exit
248  if not successFlag:
249  sys.stderr.write("I couldn't get the data from %s\n" % dataSrc)
250  sys.stderr.write("I am not able to generate the main page, ")
251  sys.stderr.write("I will leave it as it is...\n")
252  sys.stderr.write("# PLEASE SEND AN EMAIL TO cmsdoxy[at]cern.ch\n")
253  sys.exit(1)
254 
255  print 'parsing source file hierarchy...'
256  files = getFiles("%s/files.html" % htmlFilePath)
257 
258  print 'parsing packages...'
259  packages = getPackages('%s/pages.html' % htmlFilePath)
260 
261  print 'parsing classes...'
262  classes = getClasses("%s/classes.html" % htmlFilePath)
263 
264  tree = copy.copy(data['CMSSW_CATEGORIES'])
265  print "generating tree views..."
266  # merge files and the tree collected from cmsdoxy/CMSSWTagCollector
267  for domain in tree: # Core
268  for l1 in tree[domain]: # Configuration
269  for l2 in tree[domain][l1]:
270  # put github link if exists in classes dict
271  link = githubBase.format(cmsswVersion, '%s/%s'%(l1,l2))
272  tree[domain][l1][l2]['__git__'] = link
273  # prepare package documentation link if exits
274  if packages.has_key(l1) and packages[l1].has_key(l2):
275  tree[domain][l1][l2]['__packageDoc__'] = packages[l1][l2]
276  if not l1 in files or not l2 in files[l1]: continue
277  for file in files[l1][l2]:
278  # no need to have header file extension (.h)
279  file = file.replace('.h', '')
280  if not file in tree[domain][l1][l2]:
281  tree[domain][l1][l2] = {}
282  if file in classes:
283  tree[domain][l1][l2][file] = classes[file]
284  else:
285  tree[domain][l1][l2][file] = None
286 
287  # we got the data from cmsdoxy/CMSSWTagCollector, we can start prapering
288  # the html main page now.
290 
291  print "generating mainpage..."
292  fillContentTemplate(data['PERSON_MAP'])
293 
294  with open("%s/index.html" % htmlFilePath, 'w') as f:
295  f.write(bsBugFix())
296 
297  print 'generating domain pages...'
298  # generate tree view pages
299  for domain in tree:
300  page = generateTreeViewPage(tree[domain], domain)
301  fName = domain.replace(' ', '')
302  with open('%s/iframes/%s.html' % (htmlFilePath, fName), 'w') as f:
303  f.write(str(page))
def getFiles(filesPagePath)
def getPackages(packagesPagePath)
def getClasses(classesPagePath)
def generateTreeViewPage(tree, name)
def fillContentTemplate(domains)
double split
Definition: MVATrainer.cc:139