00001 import xml.dom.minidom as dom 00002 import sys, os, optparse 00003 00004 class OptionParser(optparse.OptionParser): 00005 """ 00006 OptionParser is main class to parse options. 00007 """ 00008 def __init__(self): 00009 optparse.OptionParser.__init__(self, usage="%prog --help or %prog [options] file", version="%prog 0.0.1", conflict_handler="resolve") 00010 self.add_option("--src", action="store", type="string", dest="src", help="specify source XML file") 00011 self.add_option("--min", action="store", type="int", dest="min", help="Minimum length to measure") 00012 self.add_option("--max", action="store", type="int", dest="max", help="Maximum length to measure") 00013 self.add_option("--cid", action="store", type="int", dest="cid", help="Apply combination ID") 00014 self.add_option("--xsd", action="store_true", dest="xsd", help="Create XML Schema fragment") 00015 00016 def read_data(): 00017 print "Reading histogram file" 00018 n = 0 00019 histos = srcdoc.getElementsByTagName("Histogram") 00020 for histo in histos: 00021 h = [] 00022 for key in histo.childNodes: 00023 if key.nodeType == key.ELEMENT_NODE: 00024 name = key.localName 00025 value = key.childNodes[0].nodeValue 00026 found = 0 00027 00028 if not elements.has_key(name): 00029 elements[name] = {'type': '', 'count': 0} 00030 elements[name]['count'] = elements[name]['count'] + 1 00031 00032 try: 00033 i = int(value) 00034 if elements[name]['type'] == '': 00035 elements[name]['type'] = 'xs:integer' 00036 except ValueError: 00037 try: 00038 i = float(value) 00039 if elements[name]['type'] in ('', 'xs:integer'): 00040 elements[name]['type'] = 'xs:double' 00041 except ValueError: 00042 elements[name]['type'] = 'xs:string' 00043 00044 for k in keys.keys(): 00045 if keys[k]['name'] == name and keys[k]['value'] == value: 00046 keys[k]['count'] = keys[k]['count'] + 1 00047 h.append(k) 00048 found = 1 00049 break 00050 if found == 0: 00051 keys[n] = {'name': name, 'value': value, 'count': 1} 00052 h.append(n) 00053 n += 1 00054 h.sort() 00055 histograms.append(h) 00056 00057 def create_xsd(): 00058 for k in keys.keys(): 00059 name = keys[k]['name'] 00060 00061 root = resdoc.createElement("xs:complexType") 00062 root.setAttribute("name", "HistogramType") 00063 resdoc.appendChild(root) 00064 seq = resdoc.createElement("xs:all") 00065 root.appendChild(seq) 00066 for e in sorted(elements.keys()): 00067 el = resdoc.createElement("xs:element") 00068 el.setAttribute("name", e) 00069 el.setAttribute("type", elements[e]['type']) 00070 if elements[e]['count'] < len(histograms): 00071 el.setAttribute("minOccurs", '0') 00072 el.setAttribute("maxOccurs", '1') 00073 seq.appendChild(el) 00074 00075 def create_declaration(cid): 00076 co = comb[cid] 00077 print "Declaration to apply:", co 00078 for k in comb[cid]: 00079 print keys[k]['name'], '=', keys[k]['value'] 00080 00081 def cexists(s, c): 00082 d = len(c) 00083 for v1 in s: 00084 for v2 in c: 00085 if v1 == v2: 00086 d = d - 1 00087 return (d == 0) 00088 00089 def ccopy(a): 00090 r = [] 00091 for v in a: 00092 r.append(v) 00093 return r 00094 00095 def kpermutation(vfrom, vto, min, max): 00096 vto = vto + 1 00097 queue = [] 00098 for i in range(vfrom, vto): 00099 for j in range(i, vto): 00100 queue.append(j) 00101 if len(queue) >= min and len(queue) <= max: 00102 yield queue 00103 queue = [] 00104 00105 def compute(min, max): 00106 print "Computing permutations" 00107 for v in kpermutation(0, len(keys), min, max): 00108 ci = -1 00109 for h in histograms: 00110 if cexists(h, v): 00111 if ci == -1: 00112 ci = len(comb) 00113 comb[ci] = ccopy(v) 00114 results[ci] = [h] 00115 else: 00116 results[ci].append(h) 00117 00118 def priorities(): 00119 for ci in comb.keys(): 00120 l = len(results[ci]) 00121 if l == 1: 00122 continue 00123 if not prior.has_key(l): 00124 prior[l] = [ci] 00125 else: 00126 prior[l].append(ci) 00127 00128 if __name__ == "__main__": 00129 00130 optManager = OptionParser() 00131 (opts, args) = optManager.parse_args() 00132 opts = opts.__dict__ 00133 00134 if opts['src'] in ('', None): 00135 print "You must specify a valid source xml file" 00136 sys.exit(0) 00137 00138 resdoc = dom.Document() 00139 srcdoc = dom.parse(opts['src']) 00140 00141 histograms = [] 00142 keys = {} 00143 results = {} 00144 comb = {} 00145 prior = {} 00146 elements = {} 00147 len_min = 1000000 00148 len_max = 0 00149 00150 read_data() 00151 00152 if opts['xsd'] != None: 00153 00154 create_xsd() 00155 print resdoc.toprettyxml() 00156 00157 else: 00158 00159 for h in histograms: 00160 if len(h) > len_max: len_max = len(h) 00161 if len(h) < len_min: len_min = len(h) 00162 print "Computed len: min = ", len_min, ", max = ", len_max 00163 00164 min = 2 00165 if opts['min'] not in (0, None): min = opts['min'] 00166 max = len_max 00167 if opts['max'] not in (0, None): max = opts['max'] 00168 print "Computing lens from", min, " to ", max 00169 00170 compute(min, max) 00171 priorities() 00172 00173 for pi in sorted(prior.keys()): 00174 print pi, "=", prior[pi] 00175 00176 if opts['cid'] != None: 00177 create_declaration(opts['cid'])