<a href="https://colab.research.google.com/github/varshney007/Huffman-File-Compressor/blob/main/huffman.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import heapq
import pickle
from collections import Counter

# Node class to represent each character and its frequency in the Huffman Tree
class Node:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    # Comparison function to make the Node compatible with the heapq
    def __lt__(self, other):
        return self.freq < other.freq

# 1. Read the input file
def read_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

# 2. Calculate character frequencies
def calculate_frequencies(text):
    return Counter(text)

# 3. Build the Huffman Tree using a priority queue (min-heap)
def build_huffman_tree(frequencies):
    heap = [Node(char, freq) for char, freq in frequencies.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        new_node = Node(None, left.freq + right.freq)
        new_node.left = left
        new_node.right = right
        heapq.heappush(heap, new_node)

    return heap[0]  # Root of the Huffman Tree

# 4. Generate the Huffman Codes from the Huffman Tree
def generate_huffman_codes(node, code="", code_table={}):
    if node is None:
        return

    if node.char is not None:
        code_table[node.char] = code

    generate_huffman_codes(node.left, code + '0', code_table)
    generate_huffman_codes(node.right, code + '1', code_table)

    return code_table

# 5. Compress the text by replacing each character with its Huffman code
def compress_text(text, huffman_codes):
    compressed_data = ''.join(huffman_codes[char] for char in text)
    return compressed_data

# 6. Write the compressed binary data to a file
def write_compressed_file(compressed_data, output_file):
    byte_array = bytearray()

    for i in range(0, len(compressed_data), 8):
        byte_chunk = compressed_data[i:i+8]
        byte_array.append(int(byte_chunk, 2))

    with open(output_file, 'wb') as file:
        file.write(byte_array)

# 7. Store the frequency table in a file for decompression
def store_frequency_table(frequencies, output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(frequencies, file)

# 8. Read the compressed file as binary
def read_compressed_file(file_path):
    with open(file_path, 'rb') as file:
        byte_data = file.read()
        binary_data = ''.join(format(byte, '08b') for byte in byte_data)
    return binary_data

# 9. Decompress the binary data using the Huffman Tree
def decompress_data(binary_data, huffman_tree):
    decoded_text = []
    node = huffman_tree

    for bit in binary_data:
        node = node.left if bit == '0' else node.right

        if node.char is not None:
            decoded_text.append(node.char)
            node = huffman_tree

    return ''.join(decoded_text)

# 10. Load the frequency table from the stored file
def load_frequency_table(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)

# Compression Workflow
def compress_file(input_file, compressed_file, frequency_file):
    text = read_file(input_file)
    frequencies = calculate_frequencies(text)
    huffman_tree = build_huffman_tree(frequencies)
    huffman_codes = generate_huffman_codes(huffman_tree)
    compressed_data = compress_text(text, huffman_codes)

    # Write compressed data and store frequency table
    write_compressed_file(compressed_data, compressed_file)
    store_frequency_table(frequencies, frequency_file)

# Decompression Workflow
def decompress_file(compressed_file, frequency_file, output_file):
    frequencies = load_frequency_table(frequency_file)
    huffman_tree = build_huffman_tree(frequencies)
    binary_data = read_compressed_file(compressed_file)
    original_text = decompress_data(binary_data, huffman_tree)

    with open(output_file, 'w') as file:
        file.write(original_text)

# Example usage
if __name__ == "__main__":
    # Files for testing
    input_file = 'huffman.txt'
    compressed_file = 'compressed.bin'
    frequency_file = 'frequency.pkl'
    decompressed_file = 'decompressed.txt'

    # Compress the file
    compress_file(input_file, compressed_file, frequency_file)

    # Decompress the file
    decompress_file(compressed_file, frequency_file, decompressed_file)
from google.colab import files
files.download('compressed.bin')
files.download('frequency.pkl')
files.download('decompressed.txt')
import os

# File paths
original_file = 'huffman.txt'
compressed_file = 'compressed.bin'

# Get sizes of the original and compressed files in bytes
original_size_bytes = os.path.getsize(original_file)
compressed_size_bytes = os.path.getsize(compressed_file)

# Convert sizes to kilobytes (KB)
original_size_kb = original_size_bytes / 1024
compressed_size_kb = compressed_size_bytes / 1024

# Print the sizes in KB
print(f"Original File Size: {original_size_kb:.2f} KB")
print(f"Compressed File Size: {compressed_size_kb:.2f} KB")

# Check if compression was successful
if compressed_size_bytes < original_size_bytes:
    print("The file was successfully compressed.")
else:
    print("The compression was not effective.")
