-
Notifications
You must be signed in to change notification settings - Fork 9
/
Code6_1_TrieConstruction.py
62 lines (54 loc) · 1.64 KB
/
Code6_1_TrieConstruction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#Chunyu Zhao 20151128
import sys
class Trie(object):
radix = 5
''' R-way Trie Node'''
class Node(object):
def __init__(self,number):
self.number = number
self.child = [None]*Trie.radix
self.leave = [0]*Trie.radix
def __init__(self,patterns):
self.nodes = [self.Node(0)]
self.N = 1 #number of nodes in Trie
self.patterns = patterns
def constructTrie(self):
for pattern in self.patterns:
self.put(pattern)
def put(self,pattern):
currentNode = self.nodes[0] #root
for ci in xrange(len(pattern)):
currentSymbol = pattern[ci]
currentSymbolIndex = 'ACGT$'.index(currentSymbol)
if currentNode.child[currentSymbolIndex] is None:
#add a new Node
newNode = self.Node(self.N)
self.nodes.append(newNode)
currentNode.child[currentSymbolIndex] = newNode.number
self.nodes[currentNode.number] = currentNode
self.N += 1
if ci == len(pattern)-1:
self.nodes[currentNode.number].leave[currentSymbolIndex] = 1 #
currentNode = newNode
else:
if ci == len(pattern)-1:
self.nodes[currentNode.number].leave[currentSymbolIndex] = 2
currentNode = self.nodes[currentNode.child[currentSymbolIndex]]
def printTrie(self):
nodes = self.nodes
for node in nodes:
idxs = [i for i in range(Trie.radix) if node.child[i] is not None]
for idx in idxs:
print "%d->%d:%s" % (node.number,node.child[idx],'ACGT$'[idx])
def main():
if len(sys.argv) == 2:
filename = sys.argv[1]
with open(filename) as f:
patterns = f.read().splitlines()
else:
patterns = ['ATAGA','ATA','ATGC','GAT']
trieObj = Trie(patterns)
trieObj.constructTrie()
trieObj.printTrie()
if __name__ == '__main__':
main()