CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_0/src/Documentation/ReferenceManualScripts/doxygen/utils/linker/linker.py

Go to the documentation of this file.
00001 import sys
00002 import os
00003 import re
00004 from BeautifulSoup import BeautifulSoup
00005 
00006 BASE = "/cmsdoxygen/"
00007 INDEX = {}
00008 printOutput = False;
00009 
00010 def replace(regex,replacement,content):
00011     p = re.compile(regex,re.IGNORECASE);
00012     c = p.sub(replacement,content)
00013     return c 
00014 
00015 def findMatchingFiles(w, source_htmls):
00016     ret = ""
00017     for srcFile in source_htmls:
00018         if srcFile.split("/")[-1].__str__().find(w) != -1:
00019             ret +=  " " + srcFile
00020             
00021     return ret
00022 
00023 def filter(s,w,k):
00024     o = s.split()
00025     if len(o) > 1:
00026         betterChoice = ""
00027         for i in range(len(o)):
00028             if re.search("[^a-zA-Z]"+w+"[^a-zA-Z]", o[i]):
00029                 if re.search(".*"+k+".*",o[i]):
00030                     return o[i]
00031                 else:
00032                     if betterChoice == "" or len(betterChoice) > o[i]:
00033                         betterChoice = o[i]
00034         return betterChoice
00035     else:
00036         if re.search("[^a-zA-Z]"+w+"[^a-zA-Z]", s):
00037             return s
00038         else:
00039             return ""
00040         
00041 def getLink(word):
00042         
00043     if word.isdigit() or (len(word) < 5):
00044         return ""
00045     
00046     out = filter(findMatchingFiles(word, py_source_htmls),word,"")
00047     if not out or out == "":
00048         out = filter(findMatchingFiles(word, h_source_htmls),word,"")
00049         if not out or out == "":
00050             return ""
00051     return BASE+out.lstrip()
00052 
00053 def process(filename):
00054     
00055     if (filename != None) and (len(filename) < 5):
00056         return
00057     
00058     fh = open(filename,'r')
00059     html = fh.read()
00060     fh.close()
00061 
00062   
00063     content = ""
00064     # find only code block
00065     soup = BeautifulSoup(html)
00066     pres = soup.findAll("pre", {"class":"fragment"})
00067     
00068     for pre in pres:
00069         if pre.contents != None:
00070             content += pre.renderContents()
00071     # END OF find only code block
00072 
00073     # remove links
00074     content = replace(r'<a\b[^>]*>(.*?)</a>','',content)
00075     
00076     content = content.replace("&#39;", "'")    
00077     content = content.replace("&quot;", '"')
00078     
00079     matches = []
00080     tmp = re.findall('[\w,\.]+_cf[i,g,f]',content)
00081     for t in tmp:
00082         matches.extend(t.split("."))
00083         
00084     matches.extend(re.findall('"\w+"',content))
00085     matches.extend(re.findall("'\w+'",content))
00086     
00087     set = {}                                  # 
00088     map(set.__setitem__, matches, [])         # removing duplicate keywords
00089     matches = set.keys()                      # 
00090     
00091     for match in matches:
00092         
00093         match = match.replace("'", "")    
00094         match = match.replace('"', "")
00095         
00096         if (INDEX.has_key(match)):
00097             href = INDEX[match]
00098         else:
00099             href = getLink(match)
00100         
00101         if (href != ""):
00102             INDEX[match] = BASE+href[href.find("CMSSW_"):]
00103             
00104             link = "<a class=\"configfileLink\" href=\""+href+"\">"+match+"</a>"
00105             regex = r"\b"+match+r"\b"
00106             html = replace(regex, link, html)
00107             
00108             ########################
00109             if printOutput:
00110                 print ">>>>>["+match+"]",
00111             ########################
00112         
00113             ########################
00114             if printOutput:
00115                 print href
00116             ########################
00117         
00118     fh = open(filename,'w')
00119     fh.write(html)
00120     fh.close()
00121 
00122 if len(sys.argv) > 1:
00123 
00124     DIR = sys.argv[1] +"/doc/html/"
00125               
00126     global py_source_htmls
00127     global h_source_htmls
00128 
00129     h_source_htmls = []
00130     py_source_htmls = []
00131     
00132     print "ieskau h_source"
00133     
00134     query = "find "+DIR+" -name '*8h_source.html' -print"
00135     output = os.popen(query)
00136     h_source_htmls = output.read().split("\n")
00137     
00138     print "ieskau py_source"
00139     
00140     query = "find "+DIR+" -name '*8py_source.html' -print"
00141     output = os.popen(query)
00142     py_source_htmls = output.read().split("\n")
00143    
00144     query = 'find '+DIR+' \( -name "*cf[i,g,f]*py*html" -or -name "namespace*cf[i,g,f].html" \) -print '
00145     output = os.popen(query)
00146     files = output.read().split("\n")
00147     i = 0
00148     for file in files:
00149         i = i + 1
00150         print i.__str__()+") "+file
00151         process(file)
00152     print "-----------------------------------------------------------"    
00153 else:
00154     print "not enough parameters"