00001
00002
00003
00004
00005
00006
00007 import sys
00008
00009 class Int:
00010 def __init__(self):
00011 self.value = 1
00012 def inc(self):
00013 self.value+=1
00014 def __repr__(self):
00015 return str(self.value)
00016
00017 class NameLine:
00018 def __init__(self,line):
00019 self.attr = line.split()
00020 self.seen = int(self.attr[4])
00021 self.hits = int(self.attr[3])
00022 self.id = int(self.attr[0])
00023 def hits(self): return self.hits
00024 def seen(self): return self.seen
00025 def id(self): return self.id
00026 def name(self): return self.attr[2]
00027
00028 class PathLine:
00029 def __init__(self,line):
00030 self.attr = line.split()
00031 self.hits = int(self.attr[1])
00032 self.id = int(self.attr[0])
00033 def hits(self): return self.hits
00034 def seen(self): return self.hits
00035 def id(self): return self.id
00036
00037 class MatchId:
00038 def __init__(self,id):
00039 self.id = id
00040 def match(self,nline):
00041 return self.id == nline.id()
00042
00043 class MatchLessSeen:
00044 def __init__(self,count):
00045 self.count = count
00046 def match(self,nline):
00047 return self.count < nline.attr[4]
00048
00049 class MatchLessHit:
00050 def __init__(self,count):
00051 self.count = count
00052 def match(self,nline):
00053 return self.count < nline.attr[3]
00054
00055 class MatchIdSet:
00056 def __init__(self,idset):
00057 self.idset = idset
00058 def match(self,nline):
00059 return self.idset.get(nline.attr[0])!=None
00060
00061 class Match
00062
00063 class Parse:
00064 def __init__(self, pre_in, pre_out):
00065 self.in_names = pre_in + "names"
00066 self.in_paths = pre_in + "paths"
00067 self.out_names = pre_in + "names"
00068 self.out_paths = pre_in + "paths"
00069 self.out_edges = pre_out + "edges"
00070 self.out_totals = pre_out + "totals"
00071
00072 def cut(matcher):
00073 fin_names = open(self.in_names,'r')
00074 fout_names = open(self.out_names,'w')
00075 names = []
00076 for line in fin.names.xreadlines():
00077 n = NameLine(line)
00078 b = matcher(n)
00079 if b<0: break
00080 if b:
00081 names[n.id()]
00082 fout.write(line)
00083 return names
00084
00085 def selectOneName(value):
00086 fin_names = open(self.in_names,'r')
00087 fout_names = open(self.out_names,'w')
00088 names = {}
00089 for line in fin.names.xreadlines():
00090 a=line.split()
00091 if int(a[3])==value: break
00092 names[int(a[0])]=1
00093 fout.write(line)
00094 return names
00095
00096 def trimNames(cutoff):
00097 fin_names = open(self.in_names,'r')
00098 fout_names = open(self.out_names,'w')
00099 names = []
00100 for line in fin.names.xreadlines():
00101 a=line.split()
00102 if int(a[3])<cuttoff: break
00103 names[int(a[0])]
00104 fout.write(line)
00105 return names
00106
00107 def selectManyNames(ids):
00108 fin_names = open(self.in_names,'r')
00109 fout_names = open(self.out_names,'w')
00110 names = {}
00111 for line in fin.names.xreadlines():
00112 a=line.split()
00113 if ids.get(int(a[0]))!=None:
00114 fout.write(line)
00115
00116 def trimPaths(cutoff):
00117 fin_paths = open(self.in_paths,'r')
00118 fout_paths = open(self.out_paths,'w')
00119 self.tot_paths
00120
00121 def pathContaining(id):
00122 pass
00123
00124 def runme(in_nodefile, in_treefile, out_treefile, cutoff, cuttype)
00125 fin_nodes = open(in_nodefile,'r')
00126 fin_paths = open(in_treefile,'r')
00127 fout = open(out_treefile,'w')
00128 tree = {}
00129
00130 for line in fin.xreadlines():
00131 a = line.split()
00132 id = int(a.pop(0))
00133 tot = int(a.pop(0))
00134 if tot < cutoff:
00135 print tot
00136 continue
00137 head = int(a.pop(0))
00138
00139 for node in a:
00140 val = int(node)
00141 key = (head,val)
00142 n = tree.get(key)
00143 if n == None:
00144 tree[key] = Int()
00145 else:
00146 n.inc()
00147 head = val
00148
00149 for node in tree.items():
00150
00151 print >>fout, node[1], ' ', node[0][0], ' ', node[0][1]
00152
00153 if __name__ == "__main__":
00154 if len(sys.argv) < 5:
00155 print "usage: ", sys.argv[0], " in_prefix out_prefix cutoff type"
00156 print " type = 0 means accept one exact match for cutoff value"
00157 print " type = 1 means accept anything >= cutoff value"
00158 sys.exit(1)
00159
00160 in_nodefile = sys.argv[1]
00161 in_treefile = sys.argv[2]
00162 out_treefile = sys.argv[3]
00163 cutoff = int(sys.argv[4])
00164 cuttype = int(sys.argv[5])
00165
00166 runme(in_nodefile, in_treefile, out_treefile, cutoff, cuttype)