<a href="https://colab.research.google.com/github/wanfuse123/Airship.jl/blob/master/__02D_Oligarcy_Compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile encoder.py
# --- DVD Hierarchical Encoder ---
# This script downloads a DVD ISO and applies the hierarchical compression technique

import os
import sys
import hashlib
import hmac
import struct
import numpy as np
import concurrent.futures
import time
import json
import argparse
import requests
from pathlib import Path

# Block size constants - using 64KB as our optimal block size
CHUNK_SIZE = 64 * 1024  # 64KB chunks
NUM_STAGES = 37         # Number of transformation stages
NUM_WORKERS = os.cpu_count() or 4  # Use all available cores

class HierarchicalCompressor:
    """
    Class to manage the hierarchical compression process for DVD data.
    """

    def __init__(self, master_seed=None, num_stages=NUM_STAGES,
                 chunk_size=CHUNK_SIZE, num_workers=NUM_WORKERS):
        # Initialize with random seed if none provided
        if master_seed is None:
            master_seed = os.urandom(32)
        elif isinstance(master_seed, str):
            # Hash the string to get a fixed-length seed
            master_seed = hashlib.sha256(master_seed.encode()).digest()

        self.master_seed = master_seed
        self.num_stages = num_stages
        self.chunk_size = chunk_size
        self.num_workers = num_workers
        self.debug_mode = False

    def log(self, message):
        """Log debug messages if debug mode is enabled."""
        if self.debug_mode:
            print(f"DEBUG: {message}")

    # --- Core PRNG and Transformation Functions ---

    def derive_stage_seed(self, chunk_index, stage_index):
        """Derive a stage-specific seed."""
        data = f"{chunk_index}_{stage_index}".encode()
        h = hmac.new(self.master_seed, data, hashlib.sha256)
        return h.digest()

    def custom_prng_decision(self, stage_seed):
        """Generate a binary decision (0 or 1) from the stage seed."""
        hash_val = hashlib.sha256(stage_seed).digest()
        decision = hash_val[0] & 1
        return decision

    def generate_mask_block(self, stage_seed, start_counter, block_size):
        """Generate a portion of the mask for XOR transformation."""
        digest_size = 32  # SHA-256 produces 32 bytes
        num_digests = (block_size + digest_size - 1) // digest_size

        # Pre-allocate buffer
        mask = bytearray(num_digests * digest_size)
        offset = 0

        # Generate the mask block
        for i in range(num_digests):
            counter = start_counter + i
            digest = hmac.new(stage_seed, struct.pack('>I', counter),
                             hashlib.sha256).digest()
            mask[offset:offset + len(digest)] = digest
            offset += len(digest)

        # Convert to NumPy array and trim
        return np.frombuffer(mask[:block_size], dtype=np.uint8)

    def apply_xor_transformation(self, data, mask):
        """Apply XOR transformation to data using the given mask."""
        return np.bitwise_xor(data, mask)

    # --- Chunk Processing ---

    def process_chunk(self, chunk_index, original_chunk):
        """Process a single chunk through all stages."""
        try:
            # Convert input to NumPy array for efficient processing
            chunk_data = np.frombuffer(original_chunk, dtype=np.uint8).copy()
            recorded_decisions = []

            # Process each stage sequentially
            for stage in range(self.num_stages):
                # Generate stage seed
                stage_seed = self.derive_stage_seed(chunk_index, stage)

                # Make binary decision
                decision = self.custom_prng_decision(stage_seed)
                recorded_decisions.append(decision)

                # Skip transformation if decision is 0
                if decision == 0:
                    continue

                # Process the chunk in blocks to save memory
                for offset in range(0, len(chunk_data), self.chunk_size):
                    # Adjust block size for the last block
                    current_block_size = min(self.chunk_size, len(chunk_data) - offset)

                    # Generate mask block
                    counter_start = offset // 32  # Each SHA-256 digest is 32 bytes
                    mask_block = self.generate_mask_block(stage_seed, counter_start,
                                                        current_block_size)

                    # Apply transformation to this block
                    chunk_data[offset:offset + current_block_size] = self.apply_xor_transformation(
                        chunk_data[offset:offset + current_block_size],
                        mask_block
                    )

            # Convert back to bytes
            return chunk_data.tobytes(), recorded_decisions

        except Exception as e:
            print(f"Error in process_chunk: {str(e)}")
            # Return original data and empty decisions on error
            return original_chunk, []

    # --- Parallel Processing ---

    def process_multiple_chunks(self, chunks):
        """Process multiple chunks in parallel."""
        transformed_chunks = [None] * len(chunks)
        all_decisions = [None] * len(chunks)

        # Define the worker function
        def process_chunk_worker(args):
            chunk_idx, chunk_data = args
            transformed, decisions = self.process_chunk(chunk_idx, chunk_data)
            return chunk_idx, transformed, decisions

        # Use ThreadPoolExecutor for parallel processing
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.num_workers) as executor:
            # Submit tasks
            futures = [executor.submit(process_chunk_worker, (i, chunk))
                      for i, chunk in enumerate(chunks)]

            total = len(futures)
            completed = 0
            last_percentage = 0

            # Collect results
            for future in concurrent.futures.as_completed(futures):
                try:
                    chunk_idx, transformed, decisions = future.result()
                    transformed_chunks[chunk_idx] = transformed
                    all_decisions[chunk_idx] = decisions

                    # Update completion percentage (only at 10% intervals)
                    completed += 1
                    percentage = int((completed / total) * 100)
                    if percentage >= last_percentage + 10:
                        print(f"Processing: {percentage}% complete ({completed}/{total} chunks)")
                        last_percentage = percentage

                except Exception as e:
                    print(f"Error in worker thread: {str(e)}")

        # Check for unprocessed chunks
        for i in range(len(chunks)):
            if transformed_chunks[i] is None:
                print(f"Warning: Chunk {i} was not processed, using original")
                transformed_chunks[i] = chunks[i]
                all_decisions[i] = []

        return transformed_chunks, all_decisions

    # --- DVD Processing ---

    def split_dvd_into_chunks(self, dvd_data):
        """Split a DVD into fixed-size chunks."""
        chunks = []
        for i in range(0, len(dvd_data), self.chunk_size):
            chunk = dvd_data[i:i + self.chunk_size]
            chunks.append(chunk)
        return chunks

    def process_dvd(self, dvd_data):
        """Process an entire DVD."""
        original_size = len(dvd_data)
        print(f"Processing DVD of size {original_size / (1024*1024):.2f} MB")

        # Split into chunks
        chunks = self.split_dvd_into_chunks(dvd_data)
        print(f"Split into {len(chunks)} chunks of {self.chunk_size / 1024:.1f} KB each")

        # Process all chunks in parallel
        start_time = time.time()
        transformed_chunks, all_decisions = self.process_multiple_chunks(chunks)
        process_time = time.time() - start_time
        print(f"Processing completed in {process_time:.2f} seconds")

        return transformed_chunks, all_decisions, original_size

    # --- Metadata Handling ---

    def save_reconstruction_metadata(self, all_decisions, original_size, output_file):
        """Save the reconstruction metadata to a file."""
        metadata = {
            'master_seed': self.master_seed.hex(),
            'num_stages': self.num_stages,
            'chunk_size': self.chunk_size,
            'original_size': original_size,
            'num_chunks': len(all_decisions),
            'decisions': all_decisions
        }

        with open(output_file, 'w') as f:
            json.dump(metadata, f)

        print(f"Saved reconstruction metadata to {output_file}")

        # Calculate and print metadata size
        metadata_size = os.path.getsize(output_file)
        compression_ratio = original_size / metadata_size
        print(f"Metadata size: {metadata_size / 1024:.2f} KB")
        print(f"Compression ratio: {compression_ratio:.1f}:1")

# --- Helper Functions ---

def download_file(url, output_path):
    """
    Download a file from URL with progress updates at 10% intervals.
    """
    print(f"Downloading {url} to {output_path}")
    start_time = time.time()

    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Make request
    response = requests.get(url, stream=True)
    response.raise_for_status()

    # Get file size
    total_size = int(response.headers.get('content-length', 0))
    if total_size == 0:
        print("Warning: Content length not available, download progress cannot be tracked")
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
    else:
        # Download with progress updates
        bytes_downloaded = 0
        last_percentage = 0
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    bytes_downloaded += len(chunk)

                    # Update progress at 10% intervals
                    percentage = int((bytes_downloaded / total_size) * 100)
                    if percentage >= last_percentage + 10:
                        print(f"Download: {percentage}% complete ({bytes_downloaded/1024/1024:.1f}MB/{total_size/1024/1024:.1f}MB)")
                        last_percentage = percentage

    elapsed = time.time() - start_time
    print(f"Download completed in {elapsed:.1f} seconds")

def get_alpine_iso_url():
    """Return URL for Alpine Linux ISO."""
    return "https://dl-cdn.alpinelinux.org/alpine/v3.18/releases/x86_64/alpine-standard-3.18.0-x86_64.iso"

def get_ubuntu_iso_url():
    """Return URL for Ubuntu ISO."""
    return "https://releases.ubuntu.com/22.04.3/ubuntu-22.04.3-desktop-amd64.iso"

def calculate_md5(file_path):
    """Calculate MD5 hash of a file."""
    print(f"Calculating MD5 of {file_path}...")
    md5_hash = hashlib.md5()

    # Get file size
    file_size = os.path.getsize(file_path)
    bytes_processed = 0
    last_percentage = 0

    with open(file_path, "rb") as f:
        # Read in 64kb chunks
        for chunk in iter(lambda: f.read(64 * 1024), b""):
            md5_hash.update(chunk)

            # Update progress at 10% intervals
            bytes_processed += len(chunk)
            percentage = int((bytes_processed / file_size) * 100)
            if percentage >= last_percentage + 10:
                print(f"MD5 calculation: {percentage}% complete")
                last_percentage = percentage

    result = md5_hash.hexdigest()
    print(f"MD5: {result}")
    return result

# --- Main Function ---

def main():
    parser = argparse.ArgumentParser(description="DVD Hierarchical Encoder")

    source_group = parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument("--url", help="URL to download DVD ISO")
    source_group.add_argument("--alpine", action="store_true", help="Download Alpine Linux ISO")
    source_group.add_argument("--ubuntu", action="store_true", help="Download Ubuntu ISO")
    source_group.add_argument("--file", help="Path to local DVD ISO file")
    source_group.add_argument("--image", help="Path to image file in models/ directory")

    parser.add_argument("--output-dir", default="output", help="Output directory")
    parser.add_argument("--seed", default=None, help="Master seed (string)")
    parser.add_argument("--chunk-size", type=int, default=CHUNK_SIZE,
                        help=f"Chunk size in bytes (default: {CHUNK_SIZE})")
    parser.add_argument("--stages", type=int, default=NUM_STAGES,
                        help=f"Number of transformation stages (default: {NUM_STAGES})")

    args = parser.parse_args()

    # Create output directory
    os.makedirs(args.output_dir, exist_ok=True)

    # Get DVD ISO file
    iso_path = None

    if args.url:
        iso_path = os.path.join(args.output_dir, "downloaded.iso")
        download_file(args.url, iso_path)

    elif args.alpine:
        iso_path = os.path.join(args.output_dir, "alpine.iso")
        download_file(get_alpine_iso_url(), iso_path)

    elif args.ubuntu:
        iso_path = os.path.join(args.output_dir, "ubuntu.iso")
        download_file(get_ubuntu_iso_url(), iso_path)

    elif args.file:
        iso_path = args.file
        print(f"Using local file: {iso_path}")

    elif args.image:
        # Ensure models directory exists
        models_dir = "models"
        os.makedirs(models_dir, exist_ok=True)

        # Get full path within models directory
        iso_path = os.path.join(models_dir, args.image)

        if not os.path.exists(iso_path):
            print(f"Error: Image file {iso_path} does not exist")
            sys.exit(1)

    # Calculate MD5 of original
    original_md5 = calculate_md5(iso_path)
    print(f"Original MD5: {original_md5}")

    # Initialize compressor
    compressor = HierarchicalCompressor(
        master_seed=args.seed,
        num_stages=args.stages,
        chunk_size=args.chunk_size
    )

    # Read DVD data
    print(f"Reading DVD data from {iso_path}...")
    with open(iso_path, 'rb') as f:
        dvd_data = f.read()

    # Process DVD
    transformed_chunks, all_decisions, original_size = compressor.process_dvd(dvd_data)

    # Save transformed data
    base_name = os.path.splitext(os.path.basename(iso_path))[0]
    transformed_file = os.path.join(args.output_dir, f"{base_name}_transformed.bin")
    print(f"Saving transformed data to {transformed_file}...")
    with open(transformed_file, 'wb') as f:
        for chunk in transformed_chunks:
            f.write(chunk)

    # Save metadata
    metadata_file = os.path.join(args.output_dir, f"{base_name}_metadata.json")
    compressor.save_reconstruction_metadata(all_decisions, original_size, metadata_file)

    print("Encoding process completed!")
    print(f"- Original file: {iso_path}")
    print(f"- Transformed file: {transformed_file}")
    print(f"- Metadata file: {metadata_file}")

if __name__ == "__main__":
    main()


In [None]:
%%writefile decoder.py
#!/usr/bin/env python3
# --- DVD Hierarchical Decoder ---
# This script restores the original DVD ISO from the compressed metadata

import os
import sys
import hashlib
import hmac
import struct
import numpy as np
import concurrent.futures
import time
import json
import argparse
from pathlib import Path

# Block size constants
CHUNK_SIZE = 64 * 1024  # 64KB chunks

class HierarchicalDecoder:
    """
    Class to manage the hierarchical decoding process for DVD data.
    """

    def __init__(self, master_seed, num_stages, chunk_size, num_workers=None):
        self.master_seed = master_seed if isinstance(master_seed, bytes) else bytes.fromhex(master_seed)
        self.num_stages = num_stages
        self.chunk_size = chunk_size
        self.num_workers = num_workers or (os.cpu_count() or 4)
        self.debug_mode = False

    def log(self, message):
        """Log debug messages if debug mode is enabled."""
        if self.debug_mode:
            print(f"DEBUG: {message}")

    # --- Core PRNG and Transformation Functions ---

    def derive_stage_seed(self, chunk_index, stage_index):
        """Derive a stage-specific seed."""
        data = f"{chunk_index}_{stage_index}".encode()
        h = hmac.new(self.master_seed, data, hashlib.sha256)
        return h.digest()

    def generate_mask_block(self, stage_seed, start_counter, block_size):
        """Generate a portion of the mask for XOR transformation."""
        digest_size = 32  # SHA-256 produces 32 bytes
        num_digests = (block_size + digest_size - 1) // digest_size

        # Pre-allocate buffer
        mask = bytearray(num_digests * digest_size)
        offset = 0

        # Generate the mask block
        for i in range(num_digests):
            counter = start_counter + i
            digest = hmac.new(stage_seed, struct.pack('>I', counter),
                             hashlib.sha256).digest()
            mask[offset:offset + len(digest)] = digest
            offset += len(digest)

        # Convert to NumPy array and trim
        return np.frombuffer(mask[:block_size], dtype=np.uint8)

    def apply_xor_transformation(self, data, mask):
        """Apply XOR transformation to data using the given mask."""
        return np.bitwise_xor(data, mask)

    # --- Chunk Processing ---

    def reverse_process_chunk(self, chunk_index, transformed_chunk, recorded_decisions):
        """Reverse process a single chunk using the recorded decisions."""
        try:
            # Convert input to NumPy array for efficient processing
            chunk_data = np.frombuffer(transformed_chunk, dtype=np.uint8).copy()

            # Reverse the stages (last stage undone first)
            for stage in reversed(range(self.num_stages)):
                # Get the recorded decision for this stage
                decision = recorded_decisions[stage]

                # Skip transformation if decision was 0
                if decision == 0:
                    continue

                # Get the stage seed
                stage_seed = self.derive_stage_seed(chunk_index, stage)

                # Process the chunk in blocks to save memory
                for offset in range(0, len(chunk_data), self.chunk_size):
                    # Adjust block size for the last block
                    current_block_size = min(self.chunk_size, len(chunk_data) - offset)

                    # Generate mask block (same as in forward process)
                    counter_start = offset // 32
                    mask_block = self.generate_mask_block(stage_seed, counter_start,
                                                        current_block_size)

                    # Apply transformation to this block (XOR is its own inverse)
                    chunk_data[offset:offset + current_block_size] = self.apply_xor_transformation(
                        chunk_data[offset:offset + current_block_size],
                        mask_block
                    )

            # Convert back to bytes
            return chunk_data.tobytes()

        except Exception as e:
            print(f"Error in reverse_process_chunk: {str(e)}")
            # Return transformed data on error
            return transformed_chunk

    # --- Parallel Processing ---

    def reverse_process_multiple_chunks(self, transformed_chunks, all_decisions):
        """Reverse process multiple chunks in parallel."""
        recovered_chunks = [None] * len(transformed_chunks)

        # Define the worker function
        def reverse_process_chunk_worker(args):
            chunk_idx, chunk_data, decisions = args
            recovered = self.reverse_process_chunk(chunk_idx, chunk_data, decisions)
            return chunk_idx, recovered

        # Use ThreadPoolExecutor for parallel processing
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.num_workers) as executor:
            # Submit tasks
            futures = [executor.submit(reverse_process_chunk_worker,
                                    (i, chunk, decisions))
                      for i, (chunk, decisions) in enumerate(zip(transformed_chunks, all_decisions))]

            total = len(futures)
            completed = 0
            last_percentage = 0

            # Collect results with periodic progress updates
            for future in concurrent.futures.as_completed(futures):
                try:
                    chunk_idx, recovered = future.result()
                    recovered_chunks[chunk_idx] = recovered

                    # Update progress at 10% intervals
                    completed += 1
                    percentage = int((completed / total) * 100)
                    if percentage >= last_percentage + 10:
                        print(f"Recovering: {percentage}% complete ({completed}/{total} chunks)")
                        last_percentage = percentage

                except Exception as e:
                    print(f"Error in reverse worker thread: {str(e)}")

        # Check for unprocessed chunks
        for i in range(len(transformed_chunks)):
            if recovered_chunks[i] is None:
                print(f"Warning: Chunk {i} was not recovered, using transformed")
                recovered_chunks[i] = transformed_chunks[i]

        return recovered_chunks

    # --- DVD Processing ---

    def reassemble_chunks_into_dvd(self, chunks, original_size):
        """Reassemble chunks back into the complete DVD data."""
        # Concatenate all chunks
        data = b''.join(chunks)
        # Trim to original size
        return data[:original_size]

    def reverse_process_dvd(self, transformed_chunks, all_decisions, original_size):
        """Reverse process an entire DVD."""
        print(f"Recovering DVD of original size {original_size / (1024*1024):.2f} MB")

        # Recover all chunks in parallel
        start_time = time.time()
        recovered_chunks = self.reverse_process_multiple_chunks(transformed_chunks, all_decisions)
        process_time = time.time() - start_time
        print(f"Recovery completed in {process_time:.2f} seconds")

        # Reassemble into complete DVD
        recovered_dvd = self.reassemble_chunks_into_dvd(recovered_chunks, original_size)

        return recovered_dvd

# --- Helper Functions ---

def load_metadata(metadata_file):
    """Load metadata from a JSON file."""
    print(f"Loading metadata from {metadata_file}...")
    with open(metadata_file, 'r') as f:
        metadata = json.load(f)
    return metadata

def read_chunks(file_path, chunk_size):
    """Read a file in chunks of specified size."""
    print(f"Reading transformed data from {file_path}...")

    # Get file size for progress reporting
    file_size = os.path.getsize(file_path)
    bytes_read = 0
    last_percentage = 0

    chunks = []
    with open(file_path, 'rb') as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            chunks.append(chunk)

            # Update progress at 10% intervals
            bytes_read += len(chunk)
            percentage = int((bytes_read / file_size) * 100)
            if percentage >= last_percentage + 10:
                print(f"Reading: {percentage}% complete ({bytes_read/(1024*1024):.1f}MB/{file_size/(1024*1024):.1f}MB)")
                last_percentage = percentage

    print(f"Read {len(chunks)} chunks")
    return chunks

def calculate_md5(file_path):
    """Calculate MD5 hash of a file."""
    print(f"Calculating MD5 of {file_path}...")
    md5_hash = hashlib.md5()

    # Get file size for progress reporting
    file_size = os.path.getsize(file_path)
    bytes_processed = 0
    last_percentage = 0

    with open(file_path, "rb") as f:
        # Read in chunks to handle large files
        for chunk in iter(lambda: f.read(64 * 1024), b""):
            md5_hash.update(chunk)

            # Update progress at 10% intervals
            bytes_processed += len(chunk)
            percentage = int((bytes_processed / file_size) * 100)
            if percentage >= last_percentage + 10:
                print(f"MD5 calculation: {percentage}% complete")
                last_percentage = percentage

    result = md5_hash.hexdigest()
    print(f"MD5: {result}")
    return result

# --- Main Function ---

def main():
    parser = argparse.ArgumentParser(description="DVD Hierarchical Decoder")

    parser.add_argument("--transformed", required=True,
                        help="Path to transformed DVD file")
    parser.add_argument("--metadata", required=True,
                        help="Path to metadata JSON file")
    parser.add_argument("--output", required=True,
                        help="Path to save restored DVD")

    args = parser.parse_args()

    # Create output directory if needed
    output_dir = os.path.dirname(args.output)
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)

    # Load metadata
    metadata = load_metadata(args.metadata)

    # Extract parameters from metadata
    master_seed = metadata['master_seed']
    num_stages = metadata['num_stages']
    chunk_size = metadata['chunk_size']
    original_size = metadata['original_size']
    all_decisions = metadata['decisions']

    print(f"Metadata loaded: {len(all_decisions)} chunks, {num_stages} stages per chunk")

    # Initialize decoder
    decoder = HierarchicalDecoder(
        master_seed=master_seed,
        num_stages=num_stages,
        chunk_size=chunk_size
    )

    # Read transformed chunks
    transformed_chunks = read_chunks(args.transformed, chunk_size)

    # Verify number of chunks matches metadata
    if len(transformed_chunks) != len(all_decisions):
        print(f"Warning: Number of chunks ({len(transformed_chunks)}) does not match " +
             f"metadata ({len(all_decisions)})")

    # Recover DVD
    recovered_dvd = decoder.reverse_process_dvd(transformed_chunks, all_decisions, original_size)

    # Save recovered DVD
    print(f"Saving restored DVD to {args.output}...")
    with open(args.output, 'wb') as f:
        f.write(recovered_dvd)

    # Calculate MD5 of restored file
    restored_md5 = calculate_md5(args.output)
    print(f"Restored MD5: {restored_md5}")

    print("Decoding process completed!")
    print(f"- Transformed file: {args.transformed}")
    print(f"- Metadata file: {args.metadata}")
    print(f"- Restored file: {args.output} ({os.path.getsize(args.output) / (1024*1024):.2f} MB)")

if __name__ == "__main__":
    main()

In [None]:
%%writefile validator.py
#!/usr/bin/env python3
# --- DVD Verification Script ---
# This script compares the original and restored DVD files

import os
import sys
import hashlib
import argparse
import time

def calculate_md5(file_path):
    """
    Calculate MD5 hash of a file with progress updates at 10% intervals.
    """
    file_size = os.path.getsize(file_path)
    md5_hash = hashlib.md5()

    # Size of chunks to read
    chunk_size = 1024 * 1024  # 1MB chunks

    bytes_read = 0
    last_percentage = 0
    start_time = time.time()

    print(f"Calculating MD5 for {os.path.basename(file_path)} ({file_size/(1024*1024):.2f} MB)")

    with open(file_path, "rb") as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            md5_hash.update(chunk)
            bytes_read += len(chunk)

            # Update progress at 10% intervals
            percentage = int((bytes_read / file_size) * 100)
            if percentage >= last_percentage + 10:
                print(f"MD5 calculation: {percentage}% complete ({bytes_read/(1024*1024):.1f} MB processed)")
                last_percentage = percentage

    elapsed = time.time() - start_time
    print(f"MD5 calculation completed in {elapsed:.1f} seconds")

    return md5_hash.hexdigest()

def compare_files(file1, file2):
    """
    Compare two files byte by byte to find differences.
    Returns: (match_status, first_diff_position, total_diffs)
    """
    file1_size = os.path.getsize(file1)
    file2_size = os.path.getsize(file2)

    if file1_size != file2_size:
        print(f"File sizes differ: {file1_size} vs {file2_size} bytes")
        min_size = min(

In [None]:
# DVD Hierarchical Compression Runner for Google Colab
# This script runs the encoder, decoder, and validator in sequence
#old
# Install required dependencies
!pip install numpy requests tqdm -q

# Import necessary libraries
import os
import sys
import time
import subprocess
from google.colab import files

# Create directories
!mkdir -p output
!mkdir -p models

# Function to run a Python script and capture its output
def run_script(script_name, args):
    print(f"\n{'='*50}")
    print(f"RUNNING: {script_name} {' '.join(args)}")
    print(f"{'='*50}\n")

    # Execute the script
    start_time = time.time()
    process = subprocess.Popen(
        [sys.executable, script_name] + args,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True
    )

    # Stream the output in real-time
    for line in process.stdout:
        print(line, end='')

    process.wait()

    elapsed = time.time() - start_time
    print(f"\nCompleted in {elapsed:.2f} seconds with exit code {process.returncode}")

    return process.returncode == 0

# Function to print section header
def print_header(text):
    print(f"\n\n{'#'*60}")
    print(f"# {text}")
    print(f"{'#'*60}\n")

# Main function
def main():
    print_header("DVD HIERARCHICAL COMPRESSION SYSTEM - COLAB RUNNER")

    # Ask what source to use
    print("Select a source for the DVD ISO:")
    print("1. Download Alpine Linux ISO (smaller, ~130MB)")
    print("2. Download Ubuntu ISO (larger, ~3.5GB)")
    print("3. Upload a file")

    choice = input("\nEnter your choice (1-3): ").strip()

    # Set parameters based on choice
    if choice == '1':
        print_header("DOWNLOADING ALPINE LINUX ISO")
        source_arg = "--alpine"
        iso_name = "alpine"
    elif choice == '2':
        print_header("DOWNLOADING UBUNTU ISO")
        source_arg = "--ubuntu"
        iso_name = "ubuntu"
    elif choice == '3':
        print_header("UPLOADING FILE")
        print("Please upload a file...")
        uploaded = files.upload()

        if not uploaded:
            print("No file was uploaded. Exiting.")
            return

        # Get the uploaded file name
        upload_filename = list(uploaded.keys())[0]
        iso_name = os.path.splitext(upload_filename)[0]

        # Move the file to the output directory
        os.rename(upload_filename, f"output/{upload_filename}")
        source_arg = f"--file output/{upload_filename}"
    else:
        print("Invalid choice. Exiting.")
        return

    # Optional seed
    use_seed = input("\nUse a custom seed for reproducibility? (y/n): ").strip().lower()
    seed_arg = ""
    if use_seed == 'y':
        seed = input("Enter a seed phrase: ").strip()
        seed_arg = f"--seed {seed}"

    # Optional block size
    custom_block = input("\nUse a custom block size? (y/n, default is 64KB): ").strip().lower()
    block_arg = ""
    if custom_block == 'y':
        block_size = input("Enter block size in KB (16, 32, 64, 128, etc.): ").strip()
        try:
            block_bytes = int(block_size) * 1024
            block_arg = f"--chunk-size {block_bytes}"
        except ValueError:
            print("Invalid block size. Using default.")

    # Construct paths
    transformed_path = f"output/{iso_name}_transformed.bin"
    metadata_path = f"output/{iso_name}_metadata.json"
    restored_path = f"output/{iso_name}_restored.iso"
    original_path = f"output/{iso_name}.iso" if choice in ['1', '2'] else f"output/{upload_filename}"

    # Step 1: Run the encoder
    print_header("STEP 1: RUNNING ENCODER")

    encoder_args = source_arg.split() + ["--output-dir", "output"]
    if seed_arg:
        encoder_args += seed_arg.split()
    if block_arg:
        encoder_args += block_arg.split()

    if not run_script("encoder.py", encoder_args):
        print("Encoder failed. Exiting.")
        return

    # Step 2: Run the decoder
    print_header("STEP 2: RUNNING DECODER")

    decoder_args = [
        "--transformed", transformed_path,
        "--metadata", metadata_path,
        "--output", restored_path
    ]

    if not run_script("decoder.py", decoder_args):
        print("Decoder failed. Exiting.")
        return

    # Step 3: Run the validator
    print_header("STEP 3: RUNNING VALIDATOR")

    validator_args = [
        "--original", original_path,
        "--restored", restored_path
    ]

    run_script("validator.py", validator_args)

    # Completion
    print_header("PROCESS COMPLETE")
    print("Files created:")
    print(f"1. Transformed data: {transformed_path}")
    print(f"2. Metadata: {metadata_path}")
    print(f"3. Restored ISO: {restored_path}")

    # Option to download restored file
    download = input("\nDownload the restored ISO? (y/n): ").strip().lower()
    if download == 'y':
        print("Starting download...")
        files.download(restored_path)

# Run the main function
if __name__ == "__main__":
    main()

KeyboardInterrupt: Interrupted by user