|
| 1 | +package org.sean.tree; |
| 2 | + |
| 3 | +import org.sean.trie.Trie; |
| 4 | + |
| 5 | +import java.util.*; |
| 6 | +import java.util.stream.Collectors; |
| 7 | + |
| 8 | +public class HuffmanCodec { |
| 9 | + private static class Node implements Comparable<Node> { |
| 10 | + char ch; |
| 11 | + int frequency; |
| 12 | + |
| 13 | + Node left; |
| 14 | + Node right; |
| 15 | + |
| 16 | + public Node(char ch, int frequency) { |
| 17 | + this.ch = ch; |
| 18 | + this.frequency = frequency; |
| 19 | + } |
| 20 | + |
| 21 | + public boolean isLeaf() { |
| 22 | + return left == null && right == null; |
| 23 | + } |
| 24 | + |
| 25 | + @Override |
| 26 | + public int compareTo(Node o) { |
| 27 | + return Integer.compare(this.frequency, o.frequency); |
| 28 | + } |
| 29 | + |
| 30 | + @Override |
| 31 | + public String toString() { |
| 32 | + return "Node{" + |
| 33 | + "ch=" + ch + |
| 34 | + ", frequency=" + frequency + |
| 35 | + '}'; |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + private final PriorityQueue<Node> pq = new PriorityQueue<>(); |
| 40 | + private final Map<Character, String> codecMap = new HashMap<>(); |
| 41 | + private final Map<String, Character> decodecMap = new HashMap<>(); |
| 42 | + private Node root = null; |
| 43 | + |
| 44 | + private Trie trie; |
| 45 | + |
| 46 | + private void buildTree(String raw) { |
| 47 | + pq.clear(); |
| 48 | + |
| 49 | + Map<Character, Integer> map = new HashMap<>(); |
| 50 | + for (int i = 0; i < raw.length(); i++) { |
| 51 | + char ch = raw.charAt(i); |
| 52 | + map.put(ch, map.getOrDefault(ch, 0) + 1); |
| 53 | + } |
| 54 | + |
| 55 | + // build a priority queue according to the frequency of the nodes |
| 56 | + for (Map.Entry<Character, Integer> entry : map.entrySet()) { |
| 57 | + pq.add(new Node(entry.getKey(), entry.getValue())); |
| 58 | + } |
| 59 | + |
| 60 | + Node tmp = null; |
| 61 | + while (!pq.isEmpty()) { |
| 62 | + Node node = pq.poll(); |
| 63 | +// System.out.println("\n" + node); |
| 64 | + if (tmp == null) { |
| 65 | + tmp = node; |
| 66 | + } else { |
| 67 | + // add the new parent node |
| 68 | + Node p = new Node('X', node.frequency + tmp.frequency); |
| 69 | + p.left = tmp; |
| 70 | + p.right = node; |
| 71 | + pq.offer(p); |
| 72 | + |
| 73 | + tmp = null; |
| 74 | + } |
| 75 | + } |
| 76 | + this.root = tmp; |
| 77 | + |
| 78 | +// printTreeByLevel(tmp); |
| 79 | + |
| 80 | + buildCodecMap(map.keySet()); |
| 81 | + |
| 82 | + buildTrie(); |
| 83 | + } |
| 84 | + |
| 85 | + public String encode(String raw) { |
| 86 | + buildTree(raw); |
| 87 | + |
| 88 | + StringBuilder builder = new StringBuilder(); |
| 89 | + for (int i = 0; i < raw.length(); i++) { |
| 90 | + builder.append(codecMap.get(raw.charAt(i))); |
| 91 | + } |
| 92 | + return builder.toString(); |
| 93 | + } |
| 94 | + |
| 95 | + public String decode(String encodedStr) { |
| 96 | + StringBuilder builderOut = new StringBuilder(); |
| 97 | + |
| 98 | + int start = 0; |
| 99 | + int wordEnd; |
| 100 | + int end = encodedStr.length(); |
| 101 | + while (start < end) { |
| 102 | + String sub = null; |
| 103 | + wordEnd = start + 1; |
| 104 | + while (wordEnd <= end) { |
| 105 | + sub = encodedStr.substring(start, wordEnd); |
| 106 | + if (encodedStrFound(sub)) |
| 107 | + break; |
| 108 | + |
| 109 | + wordEnd++; |
| 110 | + } |
| 111 | + builderOut.append(decodecMap.getOrDefault(sub, '\0')); |
| 112 | + |
| 113 | + start = wordEnd; |
| 114 | + } |
| 115 | + String out = builderOut.toString(); |
| 116 | +// System.out.println("Result : " + out); |
| 117 | + return out; |
| 118 | + } |
| 119 | + |
| 120 | + private void printTreeByLevel(Node root) { |
| 121 | + Queue<Node> queue = new LinkedList<>(); |
| 122 | + queue.add(root); |
| 123 | + System.out.println(root.ch); |
| 124 | + |
| 125 | + while (!queue.isEmpty()) { |
| 126 | + int n = queue.size(); |
| 127 | + for (int i = 0; i < n; i++) { |
| 128 | + Node poll = queue.poll(); |
| 129 | + // System.out.printf(poll.toString() + " | "); |
| 130 | + |
| 131 | + if (poll.left != null) |
| 132 | + queue.offer(poll.left); |
| 133 | + if (poll.right != null) |
| 134 | + queue.offer(poll.right); |
| 135 | + } |
| 136 | +// System.out.println(); |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + |
| 141 | + private void buildTrie() { |
| 142 | + trie = new Trie(); |
| 143 | + |
| 144 | + for (String code : decodecMap.keySet()) { |
| 145 | + trie.addWord(code); |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + private boolean encodedStrFound(String encoded) { |
| 150 | + return trie.search(encoded); |
| 151 | + } |
| 152 | + |
| 153 | + private void buildCodecMap(Set<Character> chars) { |
| 154 | + traverseTree(root, chars, new LinkedList<>()); |
| 155 | + |
| 156 | + /*for (Map.Entry<Character, String> e : codecMap.entrySet()) { |
| 157 | + System.out.println(e.getKey() + " -> " + e.getValue()); |
| 158 | + }*/ |
| 159 | + } |
| 160 | + |
| 161 | + private void traverseTree(Node node, Set<Character> chars, LinkedList<Character> path) { |
| 162 | + if (node != null) { |
| 163 | + path.add('0'); |
| 164 | + traverseTree(node.left, chars, path); |
| 165 | + path.removeLast(); |
| 166 | + |
| 167 | + if (node.left == null && node.right == null) { |
| 168 | + if (chars.contains(node.ch)) { |
| 169 | + String encoded = path.stream().map(character -> character + "").collect(Collectors.joining()); |
| 170 | + codecMap.put(node.ch, encoded); |
| 171 | + decodecMap.put(encoded, node.ch); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + path.add('1'); |
| 176 | + traverseTree(node.right, chars, path); |
| 177 | + path.removeLast(); |
| 178 | + } |
| 179 | + } |
| 180 | +} |
0 commit comments