# Applied Algorithms - Huffman Coding

# Encoding

In [1]:
#Class that helps us build a huffman tree
class h_tree:
    def __init__(self, val, freq , left = None, right = None):

        self.left = left
        self.right = right
        self.freq = freq
        self.val = val
        
    #Helper function that helps us compare the frequency and sort them
    def __lt__(self, other):
        return self.freq < other.freq

In [2]:
#Function to create huffman tree
def huffman(d):
    huffman_tree = []
    
    #Create a priority queue using min heap from the dictionary
    for i,j in d.items():
        huffman_tree.append(h_tree(i, j))
    heapq.heapify(huffman_tree)
    
    #Iterate until there is just one element in the heap
    for i in range(0, len(huffman_tree)-1):
        #Get the least 2 values from the min heap
        zero = heappop(huffman_tree)
        one = heappop(huffman_tree)
        #Push the combined element as the root into the huffman tree along with its left and right node
        heappush(huffman_tree, h_tree(None, zero.freq + zero.freq, zero, one))
    return(huffman_tree)

In [3]:
from collections import Counter
import heapq
from heapq import heappush, heappop

#Open and read the file
file = open("input.txt", "rt")
ip_data = file.read().strip()
print(ip_data)

#Store each character and its frequency in a dictionary
d = dict(Counter(ip_data))
huffman_tree = huffman(d)

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.


In [4]:
#Function to encode the given data 
def encode(root, output, huffmanCode):
 
    if root is None:
        return
    
    #If the current node is the leaf, then output the value (handling if there is just one character)
    if root.left is None and root.right is None:
        huffmanCode[root.val] = output if len(output) > 0 else '0'
        
    #Recursively call the left and right child
    encode(root.left, output + '0', huffmanCode)
    encode(root.right, output + '1', huffmanCode)

#Calling the function to get the encoded data
def get_encoded_data(huffman_tree, ip_data):
    root = huffman_tree[0]
    huffmanCode = {}
    encode(root, '', huffmanCode)
    coded_data = ''
    
    for i in ip_data:
        coded_data += huffmanCode[i]
        
    return coded_data, root
    

In [5]:
coded_data, root = get_encoded_data(huffman_tree, ip_data)
print('Encoded data:' , coded_data)

Encoded data: 10111000110000100000111001100010101011110011111100110111011000111111000010011011110001101011010011110000010100101111110101101000011011110000101101010000101001110100110100111101001010100011111010110001011001011101100001111100110100101111110111100001110111011101100011000000101111111011100100011101110101000011100010101000010011000101101011000111101001111010111011010101100111101011011111100110111111100001000001100001010110111011000111111000010000011011100100101011100110100111010011111100110111100111100110111011101011100101010110000011000111100110100111101110111000010110001111001101011111000001100011001111000101111110101111001110011111011001101000111101010010001111110111000001011010000100101100011010100110100011000011011001111111111111100111100101101000110111111001101111111000010000111110110011000111110001110011110101101001111110011011110011100101010110000010110111000010011101011010001110011100100011101100011010110100001101111000010111100111100110101011101110101110000011100111

In [6]:
#Write the encoded data into the output file
with open('output.txt', 'w') as f:
    f.write(coded_data)

# Decoding

In [7]:
#Open and read the file
file = open("output.txt", "rt")
op_data = file.read()
op_data

'101110001100001000001110011000101010111100111111001101110110001111110000100110111100011010110100111100000101001011111101011010000110111100001011010100001010011101001101001111010010101000111110101100010110010111011000011111001101001011111101111000011101110111011000110000001011111110111001000111011101010000111000101010000100110001011010110001111010011110101110110101011001111010110111111001101111111000010000011000010101101110110001111110000100000110111001001010111001101001110100111111001101111001111001101110111010111001010101100000110001111001101001111011101110000101100011110011010111110000011000110011110001011111101011110011100111110110011010001111010100100011111101110000010110100001001011000110101001101000110000110110011111111111111001111001011010001101111110011011111110000100001111101100110001111100011100111101011010011111100110111100111001010101100000101101110000100111010110100011100111001000111011000110101101000011011110000101111001111001101010111011101011100000111001111101101001011

In [8]:
#Function to decode the data
def decode(root, index, code, decoded):
 
    if root is None:
        return index
    
    # If the root is a leaf node, we have reached the end of tree
    if root.left is None and root.right is None:
        decoded = root.val
        print(decoded, end = '')
        return index
 
    #increment the index and traverse in the direction of 0 or 1
    index = index + 1
    root = root.left if code[index] == '0' else root.right
    return decode(root, index, code, decoded)

def get_decoded_data(root, op_data):
    #The text has just one unique character
    if root.left is None and root.right is None:
        while root.freq > 0:
            print(root.val, end = '')
            root.freq -=1

    #Else recursively call the right and left child until leaf node to get the decoded data
    else:       
        index = -1
        decoded = ''
        while index < len(op_data) - 1:
            index = decode(root, index, op_data, decoded)

In [9]:
print('Decoded data:')
get_decoded_data(root, op_data)

Decoded data:
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.