In [1]:
class BinaryTreeNode:
    
    def __init__(self,value,priority):
        self.value=value
        self.priority=priority
        self.left=None
        self.right=None
        
    def __lt__(self,other):
        return self.priority<other.priority
    
    def __eq__(self,other):
        return self.priority==other.priority

In [2]:
import heapq
import os
class HuffmanCoding:
    
    def __init__(self,path):
        self.path=path
        self.__heap=[]
        self.__codeBits={}
        self.__reverseCodeBits={}
        
    def __readFile(self):
        fileObject = open(self.path,"r")
        fileTxt = fileObject.read()
        fileObject.close()
        return fileTxt.rstrip()
    
    def __makeFreqDict(self,txt):
        dic={}
        for ele in txt:
            dic[ele]=dic.get(ele,0)+1
        return dic
        
    def __makeHeap(self,FreqDict):
    
        for key,value in FreqDict.items():
            binary_tree_node=BinaryTreeNode(key,value)
            heapq.heappush(self.__heap,binary_tree_node)
            
    def __builTree(self):
    
        while len(self.__heap)>1:
            min_node_1=heapq.heappop(self.__heap)
            min_node_2=heapq.heappop(self.__heap)
            new_node=BinaryTreeNode(None,min_node_1.priority+min_node_2.priority)
            new_node.left=min_node_1
            new_node.right=min_node_2
            heapq.heappush(self.__heap,new_node)
    def __buildCodesHelper(self,root,code_bits):
        if root is None:
            return
        if root.value is not None:
            self.__codeBits[root.value]=code_bits
            self.__reverseCodeBits[code_bits]=root.value
            return
        self.__buildCodesHelper(root.left,code_bits+'0')
        self.__buildCodesHelper(root.right,code_bits+'1')
        
            
    def __buildCodes(self):
        root=heapq.heappop(self.__heap)
        self.__buildCodesHelper(root,'')
    
    def __encodeTxt(self,txt):
        encodedTxt=''
        for char in txt:
            encodedTxt+=self.__codeBits[char]
        return encodedTxt
    
    def __paddedEncodeTxt(self,encodedTxt):
        padded_bit_size=8-(len(encodedTxt)%8)
        
        for i in range(padded_bit_size):
            encodedTxt+='0'
        print(padded_bit_size)    
        padded_bit_info='{0:08b}'.format(padded_bit_size)
        print(padded_bit_info)
        padded_encoded_text=padded_bit_info+encodedTxt
        return padded_encoded_text
    
    def __getBytesArrayFromEncodedTxt(self,txt):
        arr=[]
        for i in range(0,len(txt),8):
            bytes_str=txt[i:i+8]
            arr.append(int(bytes_str,2))
        return arr
    
    def compress(self):
        file_name,file_extension=os.path.splitext(self.path)
        output_filepath=file_name+'.bin'
        fileTxt=self.__readFile()
#         print(fileTxt)
        freqDict=self.__makeFreqDict(fileTxt)
#         print(freqDict)
        self.__makeHeap(freqDict)
#         print(self.__heap)
        self.__builTree()
#         print(self.__heap)
        self.__buildCodes()
        encoded_text=self.__encodeTxt(fileTxt)
        padded_encoded_text=self.__paddedEncodeTxt(encoded_text)
        bytes_array=self.__getBytesArrayFromEncodedTxt(padded_encoded_text)
        final_bytes=bytes(bytes_array)
        out_file=open(output_filepath,'wb')
        out_file.write(final_bytes)
        out_file.close()
        print('compressed')
        return output_filepath
    
    def __removePadding(self,txt):
        padding_info=txt[:8]
        padded_digits=int(padding_info,2)
        txt=txt[8:]
        actual_txt=txt[:-1*padded_digits]
        return actual_txt
    
    def __decodeTxt(self,txt):
        decoded_txt=''
        current_txt=''
        
        for bit in txt:
            current_txt+=bit
            if current_txt in self.__reverseCodeBits:
                character=self.__reverseCodeBits[current_txt]
                decoded_txt+=character
                current_txt=''
        return decoded_txt
    
    def decompress(self,filepath):
        filename,file_extension=os.path.splitext(self.path)
        out_filepath=filename+'_compress'+'.txt'
        input_file=open(filepath,'rb')
        byte=input_file.read(1)
        bytes_str=''
        while byte:
            byte=ord(byte)
            bits=bin(byte)[2:].rjust(8,'0')
            bytes_str+=bits
            byte=input_file.read(1)
        actual_text=self.__removePadding(bytes_str)
        decoded_txt=self.__decodeTxt(actual_text)
        output_file=open(out_filepath,'w')
        output_file.write(decoded_txt)
        output_file.close()
        print('decompressed')
        

In [3]:
path='Test.txt'
h=HuffmanCoding(path)
compress_path=h.compress()
h.decompress(compress_path)

8
00001000
compressed
decompressed
