### Load the dataset

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import math
from skimage.metrics import structural_similarity as ssim
import random

## Dataset Initialization and Visualization

### Purpose
This part of the code initializes the dataset by loading file paths of distorted and ground truth images for different categories. It also visualizes a sample image from each category along with its dimensions.

### Steps
1. **Define Dataset Path**  
   - The dataset path is set to `/Users/demir/Desktop/Assignment1/WarpDoc`, which should be updated as needed.
  
2. **Load Image Paths**  
   - The dataset consists of six categories: `curved`, `fold`, `incomplete`, `perspective`, `random`, and `rotate`.
   - Image paths for each category are loaded separately for distorted and ground truth images.
   - If the corresponding folder does not exist, a warning message is displayed.

3. **Visualizing Sample Images**
   - A function `show_sample_images()` is defined to:
     - Load and display a distorted and its corresponding ground truth image for each category.
     - Convert images from BGR to RGB (since OpenCV loads images in BGR format).
     - Extract and display the image dimensions in the plot title.
   - The function is called at the end to visualize the images.

### Key Libraries Used
- `os`: For accessing file paths.
- `cv2`: For image reading and processing.
- `matplotlib.pyplot`: For visualizing images.

### Expected Output
- A figure displaying two rows:
  - **Top row:** Distorted images from each category.
  - **Bottom row:** Corresponding ground truth images.
- Titles include category names and image dimensions.

This step ensures that the dataset is correctly loaded and provides an initial visual inspection of distortions.


In [None]:
# Define dataset path
dataset_path = '/Users/demir/Desktop/Assignment1/WarpDoc'  # Update this with your own file path

# Define categories
categories = ['curved', 'fold', 'incomplete', 'perspective', 'random', 'rotate']
image_paths = {}

# Load image file paths for each category (without resizing)
for category in categories:
    distorted_path = os.path.join(dataset_path, 'distorted', category)
    ground_truth_path = os.path.join(dataset_path, 'digital', category)
    
    if os.path.exists(distorted_path) and os.path.exists(ground_truth_path):
        distorted_images = [os.path.join(distorted_path, img) for img in os.listdir(distorted_path)]
        ground_truth_images = [os.path.join(ground_truth_path, img) for img in os.listdir(ground_truth_path)]
        image_paths[category] = {'distorted': distorted_images, 'ground_truth': ground_truth_images}
    else:
        print(f"Folder not found for category: {category}")

# Function to visualize sample images from each category and display their dimensions
def show_sample_images(image_paths, categories):
    plt.figure(figsize=(12, 8))
    for i, category in enumerate(categories):
        if category in image_paths and image_paths[category]['distorted']:
            # Get the first image path for distorted and ground truth images
            distorted_img_path = image_paths[category]['distorted'][0]
            ground_truth_img_path = image_paths[category]['ground_truth'][0]
            
            # Load images using OpenCV and convert BGR to RGB for display
            distorted_img = cv2.imread(distorted_img_path, cv2.IMREAD_COLOR)
            distorted_img_rgb = cv2.cvtColor(distorted_img, cv2.COLOR_BGR2RGB)
            
            ground_truth_img = cv2.imread(ground_truth_img_path, cv2.IMREAD_COLOR)
            ground_truth_img_rgb = cv2.cvtColor(ground_truth_img, cv2.COLOR_BGR2RGB)
            
            # Get image dimensions (width and height)
            d_height, d_width = distorted_img.shape[:2]
            gt_height, gt_width = ground_truth_img.shape[:2]
            
            # Plot distorted image in the first row with dimensions in the title
            plt.subplot(2, len(categories), i+1)
            plt.imshow(distorted_img_rgb)
            plt.title(f'Distorted: {category}\n{d_width}x{d_height}')
            plt.axis('off')
            
            # Plot ground truth image in the second row with dimensions in the title
            plt.subplot(2, len(categories), i+1+len(categories))
            plt.imshow(ground_truth_img_rgb)
            plt.title(f'Ground Truth: {category}\n{gt_width}x{gt_height}')
            plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Display sample images along with their dimensions
show_sample_images(image_paths, categories)


## Image Resizing for Dataset Preprocessing

To ensure uniform image dimensions, this step resizes images while preserving aspect ratio. If an image's longest side exceeds `max_dim` (1500 pixels), it is scaled down. Otherwise, it remains unchanged. The function `resize_image_if_needed()` handles resizing.

Another function, `resize_and_save_images()`, creates a new dataset folder structure (`distorted` and `digital`), replicating the original hierarchy. It resizes images only when necessary and avoids redundant processing. This step standardizes input sizes, improving efficiency in later stages like edge detection and geometric transformations.


In [None]:
def resize_image_if_needed(img, max_dim=1500):
    """
    Resize the image if its longest side is greater than max_dim.
    Otherwise, return the original image.
    
    Parameters:
        img (numpy.ndarray): The original image in BGR format.
        max_dim (int): Maximum size for the longest side of the image.
    
    Returns:
        numpy.ndarray: Resized image if needed, otherwise the original.
    """
    h, w = img.shape[:2]
    longest_side = max(h, w)
    
    if longest_side > max_dim:
        scale = max_dim / float(longest_side)
        new_w = int(w * scale)
        new_h = int(h * scale)
        resized_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
        return resized_img
    else:
        return img

def resize_and_save_images(original_dataset_path, resized_dataset_path, categories, max_dim=1500):
    """
    Create a resized version of the dataset in a new folder structure 
    mirroring the original. Only resize if the image exceeds max_dim.
    Skip resizing if the image already exists in the desired size.
    
    Parameters:
        original_dataset_path (str): Path to the original dataset.
        resized_dataset_path (str): Path to store the resized dataset.
        categories (list): List of categories (e.g., ['curved', 'fold', ...]).
        max_dim (int): Maximum size for the longest side of the image.
    """
    
    # We will replicate the "distorted" and "digital" structure
    for folder_name in ['distorted', 'digital']:
        for category in categories:
            src_folder = os.path.join(original_dataset_path, folder_name, category)
            dst_folder = os.path.join(resized_dataset_path, folder_name, category)
            
            if not os.path.exists(src_folder):
                print(f"Source folder not found: {src_folder}")
                continue
            
            # Create destination folder if it doesn't exist
            os.makedirs(dst_folder, exist_ok=True)
            
            # Iterate over all images in the source folder
            for file_name in os.listdir(src_folder):
                src_img_path = os.path.join(src_folder, file_name)
                dst_img_path = os.path.join(dst_folder, file_name)
                
                # If the file already exists in the resized dataset, 
                # we can skip reprocessing it
                if os.path.exists(dst_img_path):
                    # Optional: You could check if it's already under max_dim
                    # by reading the resized image and verifying. 
                    # But here, we assume if it exists, it's already correct.
                    continue
                
                # Read the original image
                img = cv2.imread(src_img_path, cv2.IMREAD_COLOR)
                if img is None:
                    print(f"Could not read image: {src_img_path}")
                    continue
                
                # Resize if needed
                resized_img = resize_image_if_needed(img, max_dim=max_dim)
                
                # Save the resized image
                cv2.imwrite(dst_img_path, resized_img)

    print("Resizing completed. Check your resized dataset at:", resized_dataset_path)


## Comparing Original and Resized Images

This function visually compares original and resized images to ensure proper scaling. It selects the first image from each category in the `distorted` folder, loads both versions, and displays them side by side.

Original images might have varying dimensions, while resized ones are adjusted to fit within `max_dim`. Image dimensions are shown in titles for verification. This step helps confirm that resizing maintains structural integrity before further processing.

In [None]:
def compare_original_and_resized(original_dataset_path, resized_dataset_path, categories):
    """
    Display the original image and the resized image side by side 
    for a quick comparison.
    
    Parameters:
        original_dataset_path (str): Path to the original dataset.
        resized_dataset_path (str): Path to the resized dataset.
        categories (list): List of categories.
    """
    plt.figure(figsize=(12, 8))
    
    for i, category in enumerate(categories):
        # We'll just pick the first file in each category's "distorted" folder
        # for demonstration. Adjust as needed.
        src_folder = os.path.join(original_dataset_path, 'distorted', category)
        dst_folder = os.path.join(resized_dataset_path, 'distorted', category)
        
        if not os.path.exists(src_folder) or not os.path.exists(dst_folder):
            continue
        
        files_in_src = os.listdir(src_folder)
        if not files_in_src:
            continue
        
        # Pick the first image
        file_name = files_in_src[0]
        src_img_path = os.path.join(src_folder, file_name)
        dst_img_path = os.path.join(dst_folder, file_name)
        
        # Load original and resized images
        original_img = cv2.imread(src_img_path, cv2.IMREAD_COLOR)
        resized_img = cv2.imread(dst_img_path, cv2.IMREAD_COLOR)
        
        if original_img is None or resized_img is None:
            continue
        
        # Convert to RGB for plotting
        original_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
        resized_rgb = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
        
        # Get dimensions
        o_h, o_w = original_img.shape[:2]
        r_h, r_w = resized_img.shape[:2]
        
        # Plot original
        plt.subplot(2, len(categories), i+1)
        plt.imshow(original_rgb)
        plt.title(f"Original: {category}\n{o_w}x{o_h}")
        plt.axis('off')
        
        # Plot resized
        plt.subplot(2, len(categories), i+1+len(categories))
        plt.imshow(resized_rgb)
        plt.title(f"Resized: {category}\n{r_w}x{r_h}")
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()


In [None]:
original_dataset_path = dataset_path
resized_dataset_path  = '/Users/demir/Desktop/Assignment1/WarpDoc_resized'
categories = ['curved', 'fold', 'incomplete', 'perspective', 'random', 'rotate']

# Pictures resizing
resize_and_save_images(
    original_dataset_path=original_dataset_path, 
    resized_dataset_path=resized_dataset_path, 
    categories=categories,
    max_dim=1500  # Limit on the longest edge on 1500 pixels
)

In [None]:
# 3. Comparison of original and resized pictures
compare_original_and_resized(
    original_dataset_path=original_dataset_path, 
    resized_dataset_path=resized_dataset_path, 
    categories=categories
)


## Preliminary Hough Transform Testing

In this step, we **use OpenCV's built-in Hough Transform implementation** to analyze its efficiency and optimize parameters before implementing our own version. Although using external functions violates the assignment's requirement to write our own methods, **this test is crucial for understanding how an optimal Hough Transform performs** and tuning hyperparameters effectively.

The function detects text-aligned lines by:
1. Converting the image to grayscale and applying Gaussian blur.
2. Performing Canny edge detection, followed by dilation to enhance edges.
3. Using **Probabilistic Hough Transform** to detect potential lines.
4. Filtering lines based on **angle and length** to keep only near-horizontal segments.
5. Displaying the detected edges and filtered lines alongside the original image.

This step allows us to **experiment with different threshold values efficiently** and ensures our custom implementation will be well-calibrated for real-world document distortions.

In [None]:
def detect_lines_for_text(image_path,
                          canny_thresh1=80,
                          canny_thresh2=200,
                          pht_threshold=60,
                          min_line_length=30,
                          max_line_gap=5,
                          angle_tol=20,
                          length_tol=30):
    """
    1) Preprocess (grayscale, blur, canny)
    2) Optionally dilate edges
    3) Probabilistic Hough
    4) Filter lines by angle and length
    """
    # Load image
    original = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if original is None:
        raise ValueError(f"Cannot read image: {image_path}")
    
    gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Edges
    edges = cv2.Canny(blurred, canny_thresh1, canny_thresh2)
    
    # (Optional) Dilate to connect broken text edges
    kernel = np.ones((2, 2), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)
    
    # Probabilistic Hough
    lines_p = cv2.HoughLinesP(edges, 1, np.pi/180, pht_threshold,
                              minLineLength=min_line_length, maxLineGap=max_line_gap)
    
    filtered_segments = []
    if lines_p is not None:
        for line in lines_p:
            x1, y1, x2, y2 = line[0]
            dx = x2 - x1
            dy = y2 - y1
            angle_deg = abs(math.degrees(math.atan2(dy, dx)))
            length = math.hypot(dx, dy)
            
            if length < length_tol:
                continue
            
            # Relaxed angle check for text lines
            if (angle_deg < angle_tol) or (abs(angle_deg - 180) < angle_tol):
                # keep lines near horizontal
                filtered_segments.append((x1, y1, x2, y2))
    
    return original, edges, filtered_segments

def draw_segments(image, segments):
    out = image.copy()
    for (x1, y1, x2, y2) in segments:
        cv2.line(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
    return out

def show_text_lines(resized_dataset_path, category='perspective'):
    # 1) Select a sample
    folder = os.path.join(resized_dataset_path, 'distorted', category)
    files = os.listdir(folder)
    if not files:
        print(f"No images in {folder}")
        return
    image_path = os.path.join(folder, files[11])
    
    # 2) Detect lines with more lenient settings
    original, edges, segments = detect_lines_for_text(
        image_path,
        canny_thresh1=100,
        canny_thresh2=300,
        pht_threshold=100,     # lower threshold -> more lines
        min_line_length=30,   # smaller -> shorter lines pass
        max_line_gap=30,
        angle_tol=360,         # bigger angle tolerance -> lines that are not perfectly horizontal
        length_tol=100         # smaller length filter -> keep short lines
    )
    
    # 3) Draw lines
    lines_img = draw_segments(original, segments)
    
    # 4) Visualize
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(edges, cmap='gray')
    plt.title("Edges (with dilation)")
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(cv2.cvtColor(lines_img, cv2.COLOR_BGR2RGB))
    plt.title(f"Filtered Lines: {len(segments)} segments")
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    plt.title("Original")
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Usage
resized_dataset_path = '/Users/demir/Desktop/Assignment1/WarpDoc_resized'
show_text_lines(resized_dataset_path, category='perspective')

## Custom Hough Transform and Region-based Line Detection

This step **implements a custom version of Probabilistic Hough Transform**, following OpenCV’s `HoughLinesP` logic but written from scratch. Instead of using OpenCV’s built-in function, we manually compute an **accumulator matrix**, detect peaks, and extract line segments based on their spatial continuity.

A **document mask** is created to isolate the paper region and ignore background noise. The process follows:
1. Convert to grayscale and apply thresholding to detect bright document regions.
2. Use morphological closing to clean noise.
3. Extract the largest contour and create a mask.
4. Apply **Canny edge detection** and mask edges to focus only on the document.
5. Use our **custom Hough Transform implementation** to detect line segments.
6. Draw and visualize detected lines over the masked region.

This method ensures **better performance in real-world distortions**, especially for curved and folded documents, improving the accuracy of document rectification.

In [None]:
def myHoughLinesP_standard_topK(edges,
                                rho=1,
                                theta=np.pi/180,
                                threshold=50,
                                minLineLength=50,
                                maxLineGap=10,
                                top_k=50):
    """
    Geliştirilmiş Hough kodu:
    1) Klasik accumulator hesapla (rho, theta).
    2) threshold üstündeki tüm pikleri topla, 'oy sayısı'na göre sırala.
    3) Yalnızca ilk top_k pik için çizgi segmenti bul.
    4) Segmentleri, projeksiyon farkı maxLineGap'tan büyükse ayır,
       uzunluğu minLineLength'ten büyük olanları kaydet.
    """
    y_idxs, x_idxs = np.nonzero(edges)
    if len(x_idxs) == 0:
        return None
    
    height, width = edges.shape
    diag_len = int(np.ceil(np.sqrt(height**2 + width**2)))  # max rho
    
    # 1) theta, rho dizileri
    thetas = np.arange(0, np.pi, theta)
    cos_thetas = np.cos(thetas)
    sin_thetas = np.sin(thetas)
    num_thetas = len(thetas)

    num_rhos = int(2*diag_len/rho) + 1
    rhos_arr = np.linspace(-diag_len, diag_len, num_rhos)

    # 2) accumulator
    Xcol = x_idxs.reshape(-1,1)
    Ycol = y_idxs.reshape(-1,1)
    rho_mat = Xcol*cos_thetas + Ycol*sin_thetas
    rho_idx_mat = np.round((rho_mat+diag_len)/rho).astype(np.int32)
    
    N = len(Xcol)
    theta_idx_flat = np.tile(np.arange(num_thetas), N)
    rho_idx_flat   = rho_idx_mat.flatten()
    acc_1d = np.ravel_multi_index((rho_idx_flat, theta_idx_flat),
                                  (num_rhos, num_thetas))
    counts = np.bincount(acc_1d, minlength=num_rhos*num_thetas)
    accumulator = counts.reshape((num_rhos, num_thetas))

    # 3) threshold üstündeki pikleri bul
    peaks = []
    for r_i in range(num_rhos):
        for t_i in range(num_thetas):
            votes = accumulator[r_i, t_i]
            if votes >= threshold:
                peaks.append((votes, r_i, t_i))
    if not peaks:
        return None

    # 4) Oylara göre sırala, ilk top_k al
    peaks.sort(key=lambda x: x[0], reverse=True)
    peaks = peaks[:top_k]

    lines = []
    for (votes, r_i, t_i) in peaks:
        rho_val = rhos_arr[r_i]
        theta_val = thetas[t_i]
        c = math.cos(theta_val)
        s = math.sin(theta_val)

        # Kenar noktalarını topla
        close_points = []
        for (x, y) in zip(x_idxs, y_idxs):
            d = abs(x*c + y*s - rho_val)
            # Tolerans ~1 piksel
            if d < 1.0:
                close_points.append((x, y))
        if len(close_points)<2:
            continue
        
        # Projeksiyon
        close_points = np.array(close_points)
        t_vals = close_points[:,0]*c + close_points[:,1]*s
        sort_idx = np.argsort(t_vals)
        close_points = close_points[sort_idx]
        t_vals      = t_vals[sort_idx]

        # Segment parçalama
        start_idx = 0
        for i in range(1, len(close_points)):
            gap = t_vals[i] - t_vals[i-1]
            if gap> maxLineGap:
                seg_pts = close_points[start_idx:i]
                seg = _create_line_segment(seg_pts, c, s, minLineLength)
                if seg is not None:
                    lines.append(seg)
                start_idx= i
        # son segment
        seg_pts = close_points[start_idx:]
        seg = _create_line_segment(seg_pts, c, s, minLineLength)
        if seg is not None:
            lines.append(seg)

    if not lines:
        return None
    return np.array(lines)

def _create_line_segment(points, cos_t, sin_t, minLineLength):
    """
    from your code: if length>=minLineLength => return (x_min, y_min, x_max, y_max)
    """
    if len(points)<2:
        return None
    xs = points[:,0]
    ys = points[:,1]
    x_min, y_min = np.min(points, axis=0)
    x_max, y_max = np.max(points, axis=0)
    length = math.hypot(x_max-x_min, y_max-y_min)
    if length< minLineLength:
        return None
    return (int(x_min), int(y_min), int(x_max), int(y_max))


def find_document_mask(gray_image):
    """
    Creates a mask for the document region, assuming the document is white.

    Steps:
    1) Apply a high-threshold binary filter to capture bright document regions.
    2) Use morphological closing to remove noise.
    3) Identify the largest contour with four corners (approx 4).
    4) Generate and return the mask.
    """
    # 1) Fixed threshold
    _, bin_img = cv2.threshold(gray_image, 30, 255, cv2.THRESH_BINARY)
    
    # 2) Closing operation
    kernel = np.ones((7, 7), np.uint8)
    bin_img = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    # 3) Find contours
    contours, _ = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    
    # Identify the largest contour by area
    max_area = 0
    best_approx = None
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area > max_area:
            # Approximate contour polygon
            epsilon = 0.02 * cv2.arcLength(cnt, True)
            approx = cv2.approxPolyDP(cnt, epsilon, True)
            max_area = area
            best_approx = approx
    
    if best_approx is None:
        return None
    
    mask = np.zeros_like(gray_image, dtype=np.uint8)
    cv2.drawContours(mask, [best_approx], -1, 255, -1)
    return mask

def detect_lines_for_text_with_mask(image_path,
                                    canny_thresh1=100,
                                    canny_thresh2=200,
                                    pht_threshold=80,
                                    min_line_length=30,
                                    max_line_gap=30):
    """
    1) Load the image
    2) Convert to grayscale and create a mask (ROI)
    3) Detect edges and apply the mask
    4) Detect lines using HoughLines (myHoughLinesP_standard)
    """
    original = cv2.imread(image_path)
    if original is None:
        raise ValueError(f"Cannot read image: {image_path}")

    gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    # Document mask
    doc_mask = find_document_mask(gray)
    if doc_mask is None:
        print("No contour found. Using entire image as ROI.")
        doc_mask = np.ones_like(gray, dtype=np.uint8)*255  

    # Edge detection
    blurred = cv2.GaussianBlur(gray, (7,7), 0)
    edges = cv2.Canny(blurred, canny_thresh1, canny_thresh2)

    # Apply mask to edges
    masked_edges = cv2.bitwise_and(edges, edges, mask=doc_mask)

    # Detect Hough lines
    lines = myHoughLinesP_standard_topK(masked_edges,
                                   rho=1,
                                   theta=np.pi/180,
                                   threshold=pht_threshold,
                                   minLineLength=min_line_length,
                                   maxLineGap=max_line_gap,
                                   top_k=50)
    return original, edges, doc_mask, masked_edges, lines

def draw_segments(image, segments, color=(0,255,0), thickness=2):
    """
    Draws detected line segments on an image.
    """
    out = image.copy()
    if segments is None:
        return out
    for (x1, y1, x2, y2) in segments:
        cv2.line(out, (x1, y1), (x2, y2), color, thickness)
    return out


def show_text_lines_roi(resized_dataset_path, category='perspective'):
    folder = os.path.join(resized_dataset_path, 'distorted', category)
    files = os.listdir(folder)
    if not files:
        print("No images in", folder)
        return
    
    image_path = os.path.join(folder, files[11])

    original, edges, doc_mask, masked_edges, lines = detect_lines_for_text_with_mask(
        image_path,
        canny_thresh1=100,
        canny_thresh2=200,
        pht_threshold=80,
        min_line_length=100,
        max_line_gap=10
    )

    lines_img = draw_segments(original, lines)

    plt.figure(figsize=(14,6))

    plt.subplot(2,3,1)
    plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    plt.title("Original")
    plt.axis("off")

    plt.subplot(2,3,2)
    plt.imshow(doc_mask, cmap="gray")
    plt.title("Document Mask (ROI)")
    plt.axis("off")

    plt.subplot(2,3,3)
    plt.imshow(edges, cmap="gray")
    plt.title("Canny (unmasked)")
    plt.axis("off")

    plt.subplot(2,3,4)
    plt.imshow(masked_edges, cmap="gray")
    plt.title("Masked Edges")
    plt.axis("off")

    plt.subplot(2,3,5)
    seg_count = len(lines) if lines is not None else 0
    plt.imshow(cv2.cvtColor(lines_img, cv2.COLOR_BGR2RGB))
    plt.title(f"Hough Lines: {seg_count}")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

# Kullanım:
resized_dataset_path = '/Users/demir/Desktop/Assignment1/WarpDoc_resized'
show_text_lines_roi(resized_dataset_path, category='perspective')

## RANSAC-based Line Refinement

This section applies **RANSAC (Random Sample Consensus)** to refine the line segments detected by the Hough Transform. Hough-detected lines often contain noise and outliers, so **RANSAC helps find the most consistent line by filtering inliers**.

1. **`ransac_line_fitting()`**  
   - Selects two random points from the edge image and computes a candidate line.
   - Counts how many points (inliers) fit the line within a distance threshold.
   - Repeats for multiple iterations, keeping the best line with the most inliers.

2. **`refine_lines_with_ransac()`**  
   - Uses RANSAC to re-fit each detected Hough line.
   - Collects nearby edge points and removes weak/noisy lines.
   - Converts detected lines into a more robust **(a, b, c) format**, improving document rectification.

By using **RANSAC**, this step ensures that only the most reliable line segments are used, reducing distortion and improving perspective correction.

## Line Filtering, Grouping, and Refinement

To improve the robustness of the detected lines, several filtering and refinement steps were implemented.

1. **Filtering Short Lines**  
   - The function `filter_lines_by_length()` removes lines shorter than a given ratio of the image size. This helps eliminate small, irrelevant segments.

2. **Grouping Lines by Angle**  
   - `group_lines_by_angle()` clusters lines that have similar orientations. This ensures that vertical and horizontal edges are grouped separately.

3. **Merging Collinear Lines**  
   - `merge_collinear_lines()` combines nearby, nearly parallel lines into a single segment. It considers both **angle similarity** and **spatial proximity**.

4. **Filtering by Document Mask**  
   - `filter_lines_by_mask_coverage()` removes lines that do not sufficiently overlap with the document region, ensuring that detected lines belong to the document.

5. **RANSAC-based Refinement**  
   - `ransac_line_fitting()` refits line segments to **eliminate outliers** and find the most consistent line model.
   - `refine_lines_with_ransac()` selects candidate edge points near each line and applies **RANSAC** for better fitting.

6. **Fast Line Refinement Pipeline**  
   - `fast_ransac_pipeline()` integrates all the above steps, including line length filtering, angle-based grouping, collinear merging, document mask filtering, and RANSAC-based refinement.
   - The pipeline ensures that only **meaningful document boundaries** are considered for further perspective correction.

These steps significantly **improve the reliability of document boundary detection** and make the perspective transformation more robust.


In [None]:
def filter_lines_by_length(lines, img_width, img_height, min_length_ratio=0.1):
    """
    Filters out short lines.
    Example: min_length_ratio=0.1 ->
             line length >= 0.1 * max(img_width, img_height)
    """
    if lines is None:
        return []
    min_length = min_length_ratio * max(img_width, img_height)
    
    filtered = []
    for (x1, y1, x2, y2) in lines:
        length = math.hypot(x2 - x1, y2 - y1)
        if length >= min_length:
            filtered.append((x1, y1, x2, y2))
    return filtered

def compute_line_angle_deg(x1, y1, x2, y2):
    """
    Returns the angle of the line in the range [0, 180) degrees.
    """
    dx = x2 - x1
    dy = y2 - y1
    angle_rad = math.atan2(dy, dx)
    return math.degrees(angle_rad) % 180

def group_lines_by_angle(lines, angle_thresh=12.0):
    """
    Groups lines with similar angles.
    angle_thresh => Lines with an angle difference less than this are grouped together.
    Returns: [ [line1, line2, ...], [line3, line4, ...], ... ]
    """
    groups = []
    for line in lines:
        x1, y1, x2, y2 = line
        angle = compute_line_angle_deg(x1, y1, x2, y2)
        
        placed = False
        for g in groups:
            avg_angle = g['avg_angle']
            if abs(angle - avg_angle) < angle_thresh:
                g['lines'].append(line)
                angles = [compute_line_angle_deg(*ln) for ln in g['lines']]
                g['avg_angle'] = np.mean(angles)
                placed = True
                break
        if not placed:
            groups.append({'lines': [line], 'avg_angle': angle})
    
    return [g['lines'] for g in groups]

def lines_are_close(x1i, y1i, x2i, y2i, x1j, y1j, x2j, y2j, dist_thresh):
    """
    Checks if two line segments are close (collinear).
    A simple approach: Distance between midpoints of the lines should be < dist_thresh.
    """
    mxi = (x1i + x2i) / 2
    myi = (y1i + y2i) / 2
    mxj = (x1j + x2j) / 2
    myj = (y1j + y2j) / 2
    
    dist = math.hypot(mxj - mxi, myj - myi)
    return dist < dist_thresh

def merge_collinear_lines(lines, dist_thresh=8.0, angle_thresh=8.0):
    """
    Merges collinear and closely spaced lines within a group.
    dist_thresh: Maximum midpoint distance between lines to be considered close.
    angle_thresh: Maximum angle difference (in degrees) to be considered similar.
    """
    merged = []
    used = [False] * len(lines)
    
    for i in range(len(lines)):
        if used[i]:
            continue
        x1i, y1i, x2i, y2i = lines[i]
        ref_angle = compute_line_angle_deg(x1i, y1i, x2i, y2i)
        ref_points = [(x1i, y1i), (x2i, y2i)]
        
        used[i] = True
        
        for j in range(i+1, len(lines)):
            if used[j]:
                continue
            x1j, y1j, x2j, y2j = lines[j]
            angle_j = compute_line_angle_deg(x1j, y1j, x2j, y2j)
            
            angle_diff = abs(angle_j - ref_angle)
            angle_diff = min(angle_diff, 180 - angle_diff)  # Normalize within 180 degrees
            if angle_diff < angle_thresh:
                if lines_are_close(x1i, y1i, x2i, y2i, x1j, y1j, x2j, y2j, dist_thresh):
                    ref_points.append((x1j, y1j))
                    ref_points.append((x2j, y2j))
                    used[j] = True
        
        ref_points = np.array(ref_points)
        minx, miny = np.min(ref_points, axis=0)
        maxx, maxy = np.max(ref_points, axis=0)
        merged.append((int(minx), int(miny), int(maxx), int(maxy)))
    
    return merged

def filter_lines_by_mask_coverage(lines, doc_mask, coverage_thresh=0.3, num_samples=20):
    """
    Filters lines based on the percentage of pixels inside the doc_mask.
    coverage_thresh=0.3 => At least 30% of the sampled points along the line must be inside the mask.
    """
    if doc_mask is None:
        return lines  # If no mask is provided, return lines unchanged.
    
    h, w = doc_mask.shape[:2]
    good_lines = []
    
    for (x1, y1, x2, y2) in lines:
        length = math.hypot(x2 - x1, y2 - y1)
        if length < 1e-6:
            continue
        
        # Sample num_samples points along the line
        covered = 0
        for s in range(num_samples):
            t = s / (num_samples - 1)
            xs = int(x1 + (x2 - x1) * t)
            ys = int(y1 + (y2 - y1) * t)
            if 0 <= xs < w and 0 <= ys < h:
                # If doc_mask[ys, xs] > 0, the point is inside the mask
                if doc_mask[ys, xs] > 0:
                    covered += 1
        
        coverage = covered / num_samples
        if coverage >= coverage_thresh:
            good_lines.append((x1, y1, x2, y2))
    
    return good_lines

def ransac_line_fitting(points, dist_thresh=3.0, max_iters=1000, min_inliers=20):
    """
    Applies RANSAC to find the best-fitting line for a set of (x, y) points.
    Returns the line in (a, b, c) form.
    dist_thresh increased to 3.0 for a more relaxed inlier threshold.
    """
    import random
    if len(points) < min_inliers:
        return None, []
    
    best_line = None
    best_inliers = []
    best_count = 0
    
    for _ in range(max_iters):
        p1, p2 = random.sample(list(points), 2)
        x1, y1 = p1
        x2, y2 = p2
        if x1 == x2 and y1 == y2:
            continue
        
        # Compute the line equation in ax + by + c = 0 form
        a = y2 - y1
        b = x1 - x2
        c = x2 * y1 - x1 * y2
        norm = math.hypot(a, b)
        if norm < 1e-6:
            continue
        a /= norm
        b /= norm
        c /= norm
        
        inliers_temp = []
        for (x, y) in points:
            dist = abs(a * x + b * y + c)
            if dist < dist_thresh:
                inliers_temp.append((x, y))
        
        if len(inliers_temp) > best_count:
            best_count = len(inliers_temp)
            best_inliers = inliers_temp
            best_line = (a, b, c)
    
    if best_line is None or best_count < min_inliers:
        return None, []
    return best_line, best_inliers

def refine_lines_with_ransac(lines, edge_image, candidate_dist=8.0, ransac_dist=3.0, max_iters=1000, min_inliers=20):
    """
    Applies RANSAC to refine each detected line using nearby edge points.
    candidate_dist => Threshold to consider a point as near a line.
    ransac_dist => Inlier distance threshold for RANSAC.
    """
    if not lines:
        return []
    
    pts_y, pts_x = np.nonzero(edge_image)
    all_points = np.column_stack((pts_x, pts_y))
    
    refined = []
    for (x1, y1, x2, y2) in lines:
        dx = x2 - x1
        dy = y2 - y1
        length = math.hypot(dx, dy)
        if length < 1e-6:
            continue
        
        # Compute initial normal form (a, b, c) of the line
        a_0 = dy
        b_0 = -dx
        c_0 = dx * y1 - dy * x1
        norm_0 = math.hypot(a_0, b_0)
        if norm_0 < 1e-6:
            continue
        a_0 /= norm_0
        b_0 /= norm_0
        c_0 /= norm_0
        
        # Collect candidate inlier points
        candidate_pts = []
        for (px, py) in all_points:
            dist = abs(a_0 * px + b_0 * py + c_0)
            if dist < candidate_dist:  # 8 pixels
                candidate_pts.append((px, py))
        
        if len(candidate_pts) < min_inliers:
            continue
        
        candidate_pts = np.array(candidate_pts)
        best_line, inliers = ransac_line_fitting(candidate_pts, dist_thresh=ransac_dist,
                                                 max_iters=max_iters, min_inliers=min_inliers)
        if best_line is not None:
            refined.append(best_line)
    return refined

def fast_ransac_pipeline(lines, edge_image, img_width, img_height, doc_mask=None):
    """
    Optimized pipeline for line filtering and refinement:
    1) Filter out short lines using a lower min_length_ratio.
    2) Group lines based on angle tolerance.
    3) Merge collinear lines using adjusted dist_thresh and angle_thresh.
    4) (Optional) Apply document mask filtering for coverage.
    5) Filter short lines again after merging.
    6) Select the top 12 longest lines.
    7) Apply RANSAC refinement with increased candidate_dist and ransac_dist.
    """
    # 1) Filter short lines
    lines_filt = filter_lines_by_length(lines, img_width, img_height, min_length_ratio=0.1)
    if not lines_filt:
        print("No lines after length filtering.")
        return []
    
    # 2) Group lines by angle
    angle_groups = group_lines_by_angle(lines_filt, angle_thresh=12.0)
    
    # 3) Merge collinear lines
    merged_lines = []
    for grp in angle_groups:
        merged = merge_collinear_lines(grp, dist_thresh=8.0, angle_thresh=8.0)
        merged_lines.extend(merged)
    
    # 4) (Optional) Apply document mask filtering
    if doc_mask is not None:
        merged_lines = filter_lines_by_mask_coverage(merged_lines, doc_mask, coverage_thresh=0.3, num_samples=20)
    
    # 5) Filter short lines again
    merged_lines = filter_lines_by_length(merged_lines, img_width, img_height, min_length_ratio=0.1)
    
    if not merged_lines:
        print("No lines after merging & coverage filtering.")
        return []
    
    # 6) Select the top 12 longest lines
    merged_lines = sorted(merged_lines, key=lambda ln: math.hypot(ln[2]-ln[0], ln[3]-ln[1]), reverse=True)
    topN = 35
    final_lines = merged_lines[:topN]
    
    # 7) RANSAC-based refinement
    refined = refine_lines_with_ransac(final_lines, edge_image,
                                       candidate_dist=8.0,  # Candidate points selection threshold
                                       ransac_dist=3.0,     # Inlier distance threshold for RANSAC
                                       max_iters=1000,
                                       min_inliers=20)
    
    return refined


## Visualization of Refined RANSAC Lines

This function applies the **RANSAC pipeline** to refine document edges and visualize the results.

1. **Line Detection using Hough Transform**  
   - `detect_lines_for_text_with_mask()` extracts raw lines from the edge image.
   - If no lines are found, the process stops.

2. **Applying RANSAC Refinement**  
   - `fast_ransac_pipeline()` is used to refine detected lines, ensuring that only meaningful segments remain.

3. **Conversion from (a, b, c) to (x1, y1, x2, y2)**  
   - Since refined lines are returned in **(a, b, c) format**, they must be converted back to **(x1, y1, x2, y2)** for visualization.
   - `draw_refined_lines()` computes the intersection of each line with the image borders.

4. **Visualization**  
   - Displays:
     - **Original Image**
     - **Masked Edge Detection**
     - **Final Refined Lines from RANSAC**

This method effectively **removes outliers and enhances document boundary detection**, leading to a more accurate perspective correction.

In [None]:
def show_text_lines_roi_ransac(resized_dataset_path, category='perspective'):
    """
    Applies the RANSAC pipeline to refine document text lines and visualize the results.
    """
    folder = os.path.join(resized_dataset_path, 'distorted', category)
    files = os.listdir(folder)
    if not files:
        print("No images in", folder)
        return
    
    image_path = os.path.join(folder, files[11])
    
    # 1) Normal workflow: Detect initial lines
    original, edges, doc_mask, masked_edges, lines = detect_lines_for_text_with_mask(
        image_path,
        canny_thresh1=100,
        canny_thresh2=200,
        pht_threshold=120,
        min_line_length=100,
        max_line_gap=10
    )
    
    # 2) Apply the RANSAC pipeline if lines were found
    if lines is None:
        print("No lines found by Hough.")
        return
    
    h, w = masked_edges.shape
    
    # 3) Run the RANSAC-based refinement pipeline
    refined_lines = fast_ransac_pipeline(lines, masked_edges, w, h)
    
    print("Refined lines (a, b, c) count:", len(refined_lines))
    
    # 4) Convert refined lines (a, b, c) into drawable (x1, y1, x2, y2) format
    lines_img = draw_refined_lines(original, refined_lines)
    
    # 5) Visualization
    plt.figure(figsize=(14,6))
    
    plt.subplot(1,3,1)
    plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    plt.title("Original")
    plt.axis("off")

    plt.subplot(1,3,2)
    plt.imshow(masked_edges, cmap="gray")
    plt.title("Masked Edges")
    plt.axis("off")

    plt.subplot(1,3,3)
    plt.imshow(cv2.cvtColor(lines_img, cv2.COLOR_BGR2RGB))
    plt.title(f"Refined RANSAC Lines: {len(refined_lines)}")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

def draw_refined_lines(image, refined_lines, color=(0,255,0)):
    """
    Converts refined lines in (a, b, c) form into drawable (x1, y1, x2, y2) format.
    Since a*x + b*y + c = 0 represents an infinite line, we find its intersections
    with the image borders and draw the visible segment.
    """
    out = image.copy()
    h, w = out.shape[:2]
    for (a, b, c) in refined_lines:
        # Compute (x1, y1, x2, y2) based on screen boundaries
        # Line equation: a*x + b*y + c = 0
        # Solutions:
        #   x=0 -> y= -c/b
        #   x=w -> y= -(c + a*w)/b
        #   y=0 -> x= -c/a
        #   y=h -> x= -(c + b*h)/a
        pts = []
        if abs(b) > 1e-6:
            y_at_x0 = -c / b
            if 0 <= y_at_x0 <= h:
                pts.append((0, int(y_at_x0)))
            
            y_at_xw = -(c + a * w) / b
            if 0 <= y_at_xw <= h:
                pts.append((w, int(y_at_xw)))
        if abs(a) > 1e-6:
            x_at_y0 = -c / a
            if 0 <= x_at_y0 <= w:
                pts.append((int(x_at_y0), 0))
            
            x_at_yh = -(c + b * h) / a
            if 0 <= x_at_yh <= w:
                pts.append((int(x_at_yh), h))
        
        if len(pts) >= 2:
            # Draw the first two valid points as a line
            (x1, y1), (x2, y2) = pts[0], pts[1]
            cv2.line(out, (x1, y1), (x2, y2), color, 2)
    return out

In [None]:
show_text_lines_roi_ransac('/Users/demir/Desktop/Assignment1/WarpDoc_resized', 'perspective')

## Perspective Correction and Quality Evaluation

This section aims to refine document boundary detection and apply perspective correction to obtain a properly aligned document view. Additionally, the quality of the corrected document is evaluated using the **Structural Similarity Index (SSIM).**

### **Key Steps:**
1. **Finding the Four Dominant Lines**
   - Lines are grouped by angle using `group_abc_lines_by_angle()`.
   - The two largest groups are classified as **vertical** and **horizontal**.
   - The most extreme lines (leftmost, rightmost, topmost, bottommost) are selected as document edges.

2. **Finding Corner Points**
   - Line intersections are computed using `intersect_abc()` to determine document corners.
   - If any intersection is missing, the process fails.

3. **Sorting Corners for Homography**
   - The detected corners are sorted into the required **(top-left, top-right, bottom-right, bottom-left)** order using `sort_corners()`.
   - This ensures correct transformation.

4. **Applying Perspective Warping**
   - `warp_document()` computes a perspective transformation using `cv2.getPerspectiveTransform()`.
   - The document is rectified into a standard output size.

5. **Comparing with Ground Truth (SSIM)**
   - The processed image is resized to match the ground-truth dimensions.
   - SSIM is computed to measure similarity.

### **Challenges & Limitations:**
- The method **does not always detect all four document edges correctly**, leading to incomplete warping.
- Line detection is **still affected by unwanted background elements**, which sometimes interfere with boundary estimation.
- **Highly distorted or curved documents** require further refinement, as straight-line approximations may not be sufficient.
- While SSIM provides a numerical similarity measure, **perceptual differences** (e.g., brightness or minor shifts) may still exist.

### **Future Work:**
- Improve line filtering to ensure that only **document edges** are detected.
- Implement a **more adaptive method** for handling non-rectangular documents.


In [None]:
from itertools import combinations
from skimage.metrics import structural_similarity as ssim

def line_angle_degrees_abc(a, b, c):
    """
    (a,b,c) -> a*x + b*y + c = 0
    Returns the angle of inclination of this line whose normal vector is (a,b) in the range [0..n).
    """
    angle_rad = math.atan2(-a, b)
    n = 360
    angle_deg = math.degrees(angle_rad) % n
    return angle_deg

def intersect_abc(line1, line2):
    """
    Intersection of two lines (x, y) or None (parallel).
    line1, line2 = (a,b,c) form
    """
    a1, b1, c1 = line1
    a2, b2, c2 = line2
    denom = a1*b2 - a2*b1
    if abs(denom) < 1e-8:
        return None
    x = (b1*c2 - b2*c1)/denom
    y = (c1*a2 - c2*a1)/denom
    return (x, y)

def sort_corners(corners):
    """
    corners: 4 pieces (x,y).

    Returns: (top-left, top-right, bottom-right, bottom-left)

    The simplest method for:
    1) Sort by y axis => top row, bottom row
    2) Split top row by x axis => left, right
    3) Split bottom row by x axis => left, right
    """
    # 1) All points are in increasing order of y value.
    sorted_by_y = sorted(corners, key=lambda p: p[1])
    
    # 2) First 2 points = top row, last 2 points = bottom row
    top_two = sorted_by_y[:2]
    bottom_two = sorted_by_y[2:]
    
    # 3) In the top row, the smaller x value is left, the larger one is right
    top_left = min(top_two, key=lambda p: p[0])
    top_right = max(top_two, key=lambda p: p[0])
    
    # 4) In the bottom row, the lower x value is left, the higher one is right
    bottom_left = min(bottom_two, key=lambda p: p[0])
    bottom_right = max(bottom_two, key=lambda p: p[0])
    
    return np.array([top_left, top_right, bottom_right, bottom_left], dtype=np.float32)


def polygon_area(pts):
    """
    Returns the area of a 2D polygon (shoelace formula).
    """
    area = 0
    n = len(pts)
    for i in range(n):
        j = (i+1)%n
        area += pts[i][0]*pts[j][1] - pts[j][0]*pts[i][1]
    return abs(area/2)

def find_biggest_rectangle(lines_abc, w, h, angle_tol=10):
    """
    From the list of lines (a,b,c) obtained after RANSAC,

    it finds the rectangle with the largest area by trying 2 parallel + 2 parallel sets.

    - lines_abc: list of (a,b,c)
    - w, h: image size
    - angle_tol: parallelism angle tolerance (degrees)

    Return: corners (4,2) or None
    """
    best_area = 0
    best_corners = None
    
    # Tüm 4'lü kombinasyonları incele
    comb4 = combinations(lines_abc, 4)
    
    for four_lines in comb4:
        # 4 çizgi: L0,L1,L2,L3
        L0, L1, L2, L3 = four_lines
        # Mümkün tüm 2+2 paralel eşleşmeleri
        # 6 ikili var: (0,1),(0,2),(0,3),(1,2),(1,3),(2,3)
        # Biz 2 disjoint paralel çift arıyoruz
        idx_pairs = list(combinations(range(4),2))
        
        parallel_pairs = []
        for (i,j) in idx_pairs:
            # paralellik kontrolü
            angle_i = line_angle_degrees_abc(*four_lines[i])
            angle_j = line_angle_degrees_abc(*four_lines[j])
            diff = abs(angle_i - angle_j)
            diff = min(diff, 180-diff)
            if diff< angle_tol:
                parallel_pairs.append((i,j))
        
        found_set = None
        for p1 in parallel_pairs:
            for p2 in parallel_pairs:
                if len(set(p1).intersection(set(p2)))==0:
                    found_set=(p1,p2)
                    break
            if found_set is not None:
                break
        
        if found_set is None:
            continue
        
        (i1,j1),(i2,j2)= found_set
        # Kesişim noktaları: (i1,i2),(i1,j2),(j1,i2),(j1,j2)
        p_tl = intersect_abc(four_lines[i1], four_lines[i2])
        p_tr = intersect_abc(four_lines[i1], four_lines[j2])
        p_bl = intersect_abc(four_lines[j1], four_lines[i2])
        p_br = intersect_abc(four_lines[j1], four_lines[j2])
        if None in [p_tl,p_tr,p_bl,p_br]:
            continue
        
        corners = np.array([p_tl, p_tr, p_br, p_bl], dtype=np.float32)
        # Filtrele image içinde mi
        if any((x<0 or x>w or y<0 or y>h) for (x,y) in corners):
            continue
        
        area_val = polygon_area(corners)
        if area_val> best_area and area_val>1000:
            best_area= area_val
            best_corners= corners
    
    return best_corners

def warp_document(original, corners, out_size=(600,800)):
    """
    4 corner (x,y) -> homography -> warp
    """
    if corners is None or len(corners)<4:
        return original
    sorted_c = sort_corners(corners)
    w,h = out_size
    dst = np.array([
        [0,0],
        [w-1,0],
        [w-1,h-1],
        [0,h-1]
    ], dtype=np.float32)
    M = cv2.getPerspectiveTransform(sorted_c, dst)
    warped = cv2.warpPerspective(original, M, (w,h))
    return warped

def load_ground_truth(dataset_path, category, file_name):
    gt_path = os.path.join(dataset_path, 'digital', category, file_name)
    if not os.path.exists(gt_path):
        print("No ground-truth found:", gt_path)
        return None
    gt_img = cv2.imread(gt_path, cv2.IMREAD_COLOR)
    return gt_img

def compute_ssim(imageA, imageB):
    hA, wA = imageA.shape[:2]
    hB, wB = imageB.shape[:2]
    if (hA!=hB) or (wA!=wB):
        imageB = cv2.resize(imageB, (wA,hA), interpolation=cv2.INTER_AREA)
    grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
    grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
    score = ssim(grayA, grayB, data_range=grayB.max()-grayB.min())
    return score

## Full Document Processing and SSIM Evaluation

This function completes the document dewarping pipeline by performing the following steps:

### **Steps:**
1. **Loading the Distorted Image**  
   - Selects an image from the dataset based on the specified category.
   
2. **Edge Detection & Hough Transform**  
   - Detects edges and applies **Hough Transform** to extract initial line segments.

3. **RANSAC-Based Line Refinement**  
   - Filters the detected lines using **RANSAC** to improve accuracy.

4. **Finding Document Edges and Corners**  
   - Identifies the four boundary lines.
   - Computes the **intersections (corner points).**
   - Applies **perspective transformation** to obtain a frontal view.

5. **Loading Ground Truth & SSIM Computation**  
   - If a ground-truth image is available, **SSIM** (Structural Similarity Index) is computed to evaluate accuracy.

6. **Visualization of Results**  
   - Displays the original image, refined RANSAC lines, the warped document, and a side-by-side comparison with the ground truth.

### **Challenges & Limitations:**
- The method **does not always detect all four document edges correctly,** leading to imperfect warping.
- Some **non-document lines are still mistakenly selected**, affecting boundary extraction.
- **SSIM evaluation provides a numerical metric**, but it may not fully capture perceptual distortions.

In [None]:
def show_text_lines_roi_ransac_and_warp_with_ssim(dataset_path, category='perspective', file_index=0):
    """
    1) Load distorted image
    2) Edge detection + Hough
    3) RANSAC refine
    4) Find the largest rectangle from all 4-line combinations (find_biggest_rectangle)
    5) Warp
    6) SSIM
    7) Plot
    """
    distorted_folder = os.path.join(dataset_path, 'distorted', category)
    files = os.listdir(distorted_folder)
    if not files:
        print("No images in", distorted_folder)
        return
    file_name = files[file_index]
    distorted_path = os.path.join(distorted_folder, file_name)
    
    original, edges, doc_mask, masked_edges, lines = detect_lines_for_text_with_mask(
        distorted_path,
        canny_thresh1=100,
        canny_thresh2=200,
        pht_threshold=120,
        min_line_length=100,
        max_line_gap=10
    )
    if lines is None or len(lines)==0:
        print("No lines found by Hough.")
        return
    
    h, w = masked_edges.shape
    # RANSAC -> (a,b,c)
    refined_lines = fast_ransac_pipeline(lines, masked_edges, w, h)
    print("Refined lines (a,b,c) count:", len(refined_lines))
    
    # Find the largest rectangle
    best_corners = find_biggest_rectangle(refined_lines, w, h, angle_tol=10)
    if best_corners is None:
        print("No rectangle found.")
        warped = original
    else:
        warped = warp_document(original, best_corners, out_size=(600,800))
    
    # Ground truth
    gt_img = load_ground_truth(dataset_path, category, file_name)
    if gt_img is None:
        print("No ground-truth found. Skipping SSIM.")
        ssim_val = None
    else:
        ssim_val = compute_ssim(warped, gt_img)
        print("SSIM =", ssim_val)
    
    # Plot
    lines_img = draw_refined_lines(original, refined_lines)
    plt.figure(figsize=(16,6))
    
    plt.subplot(1,4,1)
    plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    plt.title("Original Distorted")
    plt.axis("off")

    plt.subplot(1,4,2)
    plt.imshow(cv2.cvtColor(lines_img, cv2.COLOR_BGR2RGB))
    plt.title(f"Refined RANSAC Lines: {len(refined_lines)}")
    plt.axis("off")

    plt.subplot(1,4,3)
    plt.imshow(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
    plt.title("Warped Document")
    plt.axis("off")

    plt.subplot(1,4,4)
    if gt_img is not None:
        gt_resized = cv2.resize(gt_img, (warped.shape[1], warped.shape[0]))
        side_by_side = np.hstack((warped, gt_resized))
        plt.imshow(cv2.cvtColor(side_by_side, cv2.COLOR_BGR2RGB))
        plt.title(f"Compare\nSSIM={ssim_val:.3f}" if ssim_val else "No SSIM")
    else:
        plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
        plt.title("No GroundTruth")
    plt.axis("off")

    plt.tight_layout()
    plt.show()
    return ssim_val

In [None]:
dataset_path = "/Users/demir/Desktop/Assignment1/WarpDoc_resized"
show_text_lines_roi_ransac_and_warp_with_ssim(dataset_path, category='perspective', file_index=11)

In [None]:
def process_images_with_ssim(dataset_path, categories, max_images=50, timeout=30):
    """
    Processes max_images images from each folder (e.g. perspective, curved, fold, incomplete, random, rotate), summing the SSIM value for each image.
    Prints the average SSIM after each folder is finished, and presents the results as a table after all folders.
    """
    import signal
    # Custom exception for timeout
    class TimeoutException(Exception):
        pass
    def timeout_handler(signum, frame):
        raise TimeoutException
    signal.signal(signal.SIGALRM, timeout_handler)
    
    results = {}
    for cat in categories:
        distorted_folder = os.path.join(dataset_path, 'distorted', cat)
        if not os.path.exists(distorted_folder):
            print(f"Folder does not exist: {distorted_folder}")
            continue
        files = os.listdir(distorted_folder)
        if not files:
            print(f"No images in {distorted_folder}")
            continue
        n = min(len(files), max_images)
        print(f"\nProcessing category: {cat} (up to {n} images)")
        ssim_list = []
        for i in range(n):
            file_name = files[i]
            print(f"\n--- {cat} : Image {i+1}/{n} ({file_name}) ---")
            try:
                signal.alarm(timeout)
                ssim_val = show_text_lines_roi_ransac_and_warp_with_ssim(dataset_path, category=cat, file_index=i)
                signal.alarm(0)
                if ssim_val is not None:
                    ssim_list.append(ssim_val)
            except TimeoutException:
                print(f"Image {i+1}/{n} ({file_name}) processing exceeded {timeout} seconds. Skipping.")
            except Exception as e:
                print(f"Error processing image {i+1}/{n} ({file_name}): {e}")
                signal.alarm(0)
        if ssim_list:
            avg_ssim = sum(ssim_list)/len(ssim_list)
            print(f"Category '{cat}': Average SSIM = {avg_ssim:.3f}")
            results[cat] = avg_ssim
        else:
            print(f"Category '{cat}': No SSIM computed.")
            results[cat] = None

    # Creating tables for all categories
    print("\nFinal SSIM Results:")
    print("{:<15} {:<10}".format("Category", "Avg SSIM"))
    print("-"*25)
    for cat in categories:
        avg = results.get(cat, None)
        if avg is not None:
            print("{:<15} {:<10.3f}".format(cat, avg))
        else:
            print("{:<15} {:<10}".format(cat, "N/A"))


In [None]:
dataset_path = "/Users/demir/Desktop/Assignment1/WarpDoc_resized"
categories = ["perspective", "curved", "fold", "incomplete", "random", "rotate"]
process_images_with_ssim(dataset_path, categories, max_images=50, timeout=30)