Skip to content

Commit c0a9039

Browse files
committed
Add Huffman algorithm
1 parent facdb77 commit c0a9039

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
package org.sean.tree;
2+
3+
import org.sean.trie.Trie;
4+
5+
import java.util.*;
6+
import java.util.stream.Collectors;
7+
8+
public class HuffmanCodec {
9+
private static class Node implements Comparable<Node> {
10+
char ch;
11+
int frequency;
12+
13+
Node left;
14+
Node right;
15+
16+
public Node(char ch, int frequency) {
17+
this.ch = ch;
18+
this.frequency = frequency;
19+
}
20+
21+
public boolean isLeaf() {
22+
return left == null && right == null;
23+
}
24+
25+
@Override
26+
public int compareTo(Node o) {
27+
return Integer.compare(this.frequency, o.frequency);
28+
}
29+
30+
@Override
31+
public String toString() {
32+
return "Node{" +
33+
"ch=" + ch +
34+
", frequency=" + frequency +
35+
'}';
36+
}
37+
}
38+
39+
private final PriorityQueue<Node> pq = new PriorityQueue<>();
40+
private final Map<Character, String> codecMap = new HashMap<>();
41+
private final Map<String, Character> decodecMap = new HashMap<>();
42+
private Node root = null;
43+
44+
private Trie trie;
45+
46+
private void buildTree(String raw) {
47+
pq.clear();
48+
49+
Map<Character, Integer> map = new HashMap<>();
50+
for (int i = 0; i < raw.length(); i++) {
51+
char ch = raw.charAt(i);
52+
map.put(ch, map.getOrDefault(ch, 0) + 1);
53+
}
54+
55+
// build a priority queue according to the frequency of the nodes
56+
for (Map.Entry<Character, Integer> entry : map.entrySet()) {
57+
pq.add(new Node(entry.getKey(), entry.getValue()));
58+
}
59+
60+
Node tmp = null;
61+
while (!pq.isEmpty()) {
62+
Node node = pq.poll();
63+
// System.out.println("\n" + node);
64+
if (tmp == null) {
65+
tmp = node;
66+
} else {
67+
// add the new parent node
68+
Node p = new Node('X', node.frequency + tmp.frequency);
69+
p.left = tmp;
70+
p.right = node;
71+
pq.offer(p);
72+
73+
tmp = null;
74+
}
75+
}
76+
this.root = tmp;
77+
78+
// printTreeByLevel(tmp);
79+
80+
buildCodecMap(map.keySet());
81+
82+
buildTrie();
83+
}
84+
85+
public String encode(String raw) {
86+
buildTree(raw);
87+
88+
StringBuilder builder = new StringBuilder();
89+
for (int i = 0; i < raw.length(); i++) {
90+
builder.append(codecMap.get(raw.charAt(i)));
91+
}
92+
return builder.toString();
93+
}
94+
95+
public String decode(String encodedStr) {
96+
StringBuilder builderOut = new StringBuilder();
97+
98+
int start = 0;
99+
int wordEnd;
100+
int end = encodedStr.length();
101+
while (start < end) {
102+
String sub = null;
103+
wordEnd = start + 1;
104+
while (wordEnd <= end) {
105+
sub = encodedStr.substring(start, wordEnd);
106+
if (encodedStrFound(sub))
107+
break;
108+
109+
wordEnd++;
110+
}
111+
builderOut.append(decodecMap.getOrDefault(sub, '\0'));
112+
113+
start = wordEnd;
114+
}
115+
String out = builderOut.toString();
116+
// System.out.println("Result : " + out);
117+
return out;
118+
}
119+
120+
private void printTreeByLevel(Node root) {
121+
Queue<Node> queue = new LinkedList<>();
122+
queue.add(root);
123+
System.out.println(root.ch);
124+
125+
while (!queue.isEmpty()) {
126+
int n = queue.size();
127+
for (int i = 0; i < n; i++) {
128+
Node poll = queue.poll();
129+
// System.out.printf(poll.toString() + " | ");
130+
131+
if (poll.left != null)
132+
queue.offer(poll.left);
133+
if (poll.right != null)
134+
queue.offer(poll.right);
135+
}
136+
// System.out.println();
137+
}
138+
}
139+
140+
141+
private void buildTrie() {
142+
trie = new Trie();
143+
144+
for (String code : decodecMap.keySet()) {
145+
trie.addWord(code);
146+
}
147+
}
148+
149+
private boolean encodedStrFound(String encoded) {
150+
return trie.search(encoded);
151+
}
152+
153+
private void buildCodecMap(Set<Character> chars) {
154+
traverseTree(root, chars, new LinkedList<>());
155+
156+
/*for (Map.Entry<Character, String> e : codecMap.entrySet()) {
157+
System.out.println(e.getKey() + " -> " + e.getValue());
158+
}*/
159+
}
160+
161+
private void traverseTree(Node node, Set<Character> chars, LinkedList<Character> path) {
162+
if (node != null) {
163+
path.add('0');
164+
traverseTree(node.left, chars, path);
165+
path.removeLast();
166+
167+
if (node.left == null && node.right == null) {
168+
if (chars.contains(node.ch)) {
169+
String encoded = path.stream().map(character -> character + "").collect(Collectors.joining());
170+
codecMap.put(node.ch, encoded);
171+
decodecMap.put(encoded, node.ch);
172+
}
173+
}
174+
175+
path.add('1');
176+
traverseTree(node.right, chars, path);
177+
path.removeLast();
178+
}
179+
}
180+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package org.sean.tree;
2+
3+
import org.junit.Before;
4+
import org.junit.Test;
5+
6+
import static org.junit.Assert.*;
7+
8+
public class HuffmanCodecTest {
9+
private HuffmanCodec codec;
10+
11+
@Before
12+
public void setUp() throws Exception {
13+
codec = new HuffmanCodec();
14+
}
15+
16+
@Test
17+
public void decode() {
18+
String raw = "aaaaa aaaaa eeee e e e e e e e e e e eiiiiiiiiiiiissstttt\r";
19+
String encoded = codec.encode(raw);
20+
System.out.println("encoded as :" + encoded);
21+
22+
String actual = codec.decode(encoded);
23+
assertEquals("Huffman codec works", raw, actual);
24+
}
25+
}

0 commit comments

Comments
 (0)