In [84]:
class BinaryTreeNode:
    def __init__(self,char,freq):
            self.char = char
            self.freq = freq
            self.left = None
            self.right = None
            
    def __lt__(self,other):
        return self.freq < other.freq
    
    def __gt__(self,other):
        return self.freq > other.freq
    
    def __eq__(self,other):
        return self.freq == other.freq
    

In [85]:
import heapq

In [86]:
def createHeap(st):
    d = {}
    for char in st:
        d[char] = d.get(char,0) + 1
        
    heap = []
    for i in d:
        heap.append(BinaryTreeNode(i,d[i]))
        
    heapq.heapify(heap)
    return heap

In [87]:
def printCodes(root,st):
    if root is None:
        return
    if root.left is None and root.right is None:
        print(root.char,": ",st)
        return
    printCodes(root.left, st + "0")
    printCodes(root.right, st + "1")

In [88]:
def huffmanCoding(st):
    heap = createHeap(st)
    while len(heap) > 1:
        leftChild = heap[0]
        heapq.heappop(heap)
        rightChild = heap[0]
        heapq.heappop(heap)
        freq = leftChild.freq + rightChild.freq
        char = leftChild.char + rightChild.char
        newNode = BinaryTreeNode(char,freq)
        newNode.left = leftChild
        newNode.right = rightChild
        heapq.heappush(heap,newNode)
        
    printCodes(heap[0],"")    

In [89]:
print("Enter the string: ")
string =input()
huffmanCoding(string)

Enter the string: 
aabbaaacdccde
d :  00
c :  01
e :  100
b :  101
a :  11


In [90]:
string = "bccccccaaaaaddd"
huffmanCoding(string)

c :  0
b :  100
d :  101
a :  11


In [91]:
import os

In [92]:
class HuffmanCoding:
    def __init__(self,path):
        self.path = path
        self.__heap = []
        self.__codes = {}
        self.__reverse_Codes = {}
    
    def __createHeap(self,st):
        d = {}
        for char in st:
            d[char] = d.get(char,0) + 1
            
        for i in d:
            self.__heap.append(BinaryTreeNode(i,d[i]))
        
        heapq.heapify(self.__heap)
        return
    
    def __createCodesHelper(self,root,st):
        if root is None:
            return
        if root.left is None and root.right is None:
            self.__codes[root.char] = st
            self.__reverse_Codes[st] = root.char
            return
        self.__createCodesHelper(root.left,st + "0")
        self.__createCodesHelper(root.right,st + "1")
        
    def __createCodes(self):
        root = heapq.heappop(self.__heap)
        self.__createCodesHelper(root,"")
        return
        
    def __createEncodedText(self,text):
        encodedText = ""
        for char in text:
            encodedText += self.__codes[char]
            
        return encodedText    
    
    def __getPaddedEncodedText(self,encodedText):
        padded_amount = 8 - (len(encodedText)%8)
        for i in range(0,padded_amount):
            encodedText += "0"
        
        padded_info = "{0:08b}".format(padded_amount)
        paddedEncodedText = padded_info + encodedText
        
        return paddedEncodedText
    
    def __getBytesList(self,paddedEncodedText):
        li = []
        for i in range(0,len(paddedEncodedText),8):
            byte = paddedEncodedText[i:i+8]
            li.append(int(byte,2))
            
        return li
    
    def __createTree(self):
        while len(self.__heap) > 1:
            leftChild = self.__heap[0]
            heapq.heappop(self.__heap)
            rightChild = self.__heap[0]
            heapq.heappop(self.__heap)
            freq = leftChild.freq + rightChild.freq
            char = leftChild.char + rightChild.char
            newNode = BinaryTreeNode(char,freq)
            newNode.left = leftChild
            newNode.right = rightChild
            heapq.heappush(self.__heap,newNode)
            
        return self.__heap[0]    
        
        
    def compress(self):
        #get the file from the path
        #read the file from the path
        file_name,file_extension = os.path.splitext(self.path)
        output_path = file_name + ".bin"
        
        
        with open(self.path,'r+') as file , open(output_path,'wb') as output:
            text = file.read()
            text = text.rstrip()
            #create the heap from the file
            self.__createHeap(text)

            #create the tree from the heap
            self.__createTree()
    
            #create the codes from the tree
            self.__createCodes()
    
            #create the encoded text form the codes
            encodedText = self.__createEncodedText(text)
    
            #create the padded coded text from encoded text
            paddedEncodedText = self.__getPaddedEncodedText(encodedText)
    
            #create a binary file of padded encoded text
            bytes_list = self.__getBytesList(paddedEncodedText)
    
            #return this binary file as output
            finalBytes = bytes(bytes_list)
            output.write(finalBytes)
         
        print("File Compressed")
        return output_path
    
    def __removePadding(self,text):
        padded_info = text[:8]
        extra_padding = int(padded_info,2)
        text = text[8:]
        text_after_padding_removed = text[:-1*extra_padding]
        return text_after_padding_removed
    
    def __decodeText(self,text):
        decoded_text = ""
        curr_bits = ""
        for bit in text:
            curr_bits += bit
            if curr_bits in self.__reverse_Codes:
                decoded_text += self.__reverse_Codes[curr_bits]
                curr_bits = ""
                
        return decoded_text
                
    def decompress(self,input_path):
        filename,file_extension = os.path.splitext(self.path)
        output_path = filename + "_decompressed" + ".txt"
        with open(input_path,'rb') as file , open(output_path,'w') as output:
            bit_string = ""
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8,'0')
                bit_string += bits
                byte = file.read(1)
                
            actual_text = self.__removePadding(bit_string)
            decompressed_text = self.__decodeText(actual_text)
            output.write(decompressed_text)
             
        return     
        
        
path = r"C:\Users\Vijay Raikar\Desktop\ A Python Programs\Sample.txt"
h = HuffmanCoding(path)
output_path = h.compress()
h.decompress(output_path)



File Compressed
