# Notebook 7: Results Visualization

This notebook creates comprehensive visualizations including side-by-side comparisons, metric charts, and analysis reports.


In [None]:
import sys
import os
from pathlib import Path
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm

sys.path.append('..')
from utils import load_image, get_image_files, visualize_comparison

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Libraries imported successfully!")


## 1. Load Evaluation Results


In [None]:
# Load evaluation results
results_df = pd.read_csv('../evaluation_results.csv')
summary_df = pd.read_csv('../evaluation_summary.csv')

print(f"Loaded results for {len(results_df)} images")
print("\nSummary Statistics:")
print(summary_df)

# Paths
GROUND_TRUTH_DIR = Path('../data/ground_truth')
DAMAGED_DIR = Path('../data/damaged')
METHODS = {
    'PDE': Path('../methods/PDE/results'),
    'Patch': Path('../methods/Patch/results'),
    'Deep': Path('../methods/Deep/results'),
    'Hybrid': Path('../methods/Hybrid/results')
}


## 2. Metric Comparison Charts


In [None]:
# Prepare data for plotting
metrics_data = []
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    metrics_data.append({
        'Method': method,
        'PSNR': results_df[f'{method}_PSNR'].mean(),
        'SSIM': results_df[f'{method}_SSIM'].mean(),
        'LPIPS': results_df[f'{method}_LPIPS'].mean(),
        'EdgeAcc': results_df[f'{method}_EdgeAcc'].mean()
    })

metrics_df = pd.DataFrame(metrics_data)

# Create comparison charts
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# PSNR
axes[0, 0].bar(metrics_df['Method'], metrics_df['PSNR'], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[0, 0].set_title('PSNR Comparison (Higher is Better)', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('PSNR (dB)')
axes[0, 0].grid(axis='y', alpha=0.3)

# SSIM
axes[0, 1].bar(metrics_df['Method'], metrics_df['SSIM'], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[0, 1].set_title('SSIM Comparison (Higher is Better)', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('SSIM')
axes[0, 1].grid(axis='y', alpha=0.3)

# LPIPS
axes[1, 0].bar(metrics_df['Method'], metrics_df['LPIPS'], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[1, 0].set_title('LPIPS Comparison (Lower is Better)', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('LPIPS')
axes[1, 0].grid(axis='y', alpha=0.3)

# Edge Accuracy
axes[1, 1].bar(metrics_df['Method'], metrics_df['EdgeAcc'], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[1, 1].set_title('Edge Accuracy Comparison (Higher is Better)', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Edge Accuracy')
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('../metric_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("Metric comparison charts saved!")


## 3. Box Plots for Metric Distributions


In [None]:
# Prepare data for box plots
box_data = []
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    for metric in ['PSNR', 'SSIM', 'LPIPS', 'EdgeAcc']:
        values = results_df[f'{method}_{metric}'].dropna()
        for val in values:
            box_data.append({
                'Method': method,
                'Metric': metric,
                'Value': val
            })

box_df = pd.DataFrame(box_data)

# Create box plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

metrics_to_plot = ['PSNR', 'SSIM', 'LPIPS', 'EdgeAcc']
for idx, metric in enumerate(metrics_to_plot):
    ax = axes[idx // 2, idx % 2]
    metric_data = box_df[box_df['Metric'] == metric]
    sns.boxplot(data=metric_data, x='Method', y='Value', ax=ax)
    ax.set_title(f'{metric} Distribution', fontsize=14, fontweight='bold')
    ax.set_ylabel(metric)
    ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('../metric_distributions.png', dpi=300, bbox_inches='tight')
plt.show()

print("Box plots saved!")


## 4. Side-by-Side Visual Comparisons

Create visual comparisons for best, worst, and random sample images.


In [None]:
# Find best and worst cases (based on average PSNR across all methods)
results_df['avg_PSNR'] = results_df[['PDE_PSNR', 'Patch_PSNR', 'Deep_PSNR', 'Hybrid_PSNR']].mean(axis=1)
best_idx = results_df['avg_PSNR'].idxmax()
worst_idx = results_df['avg_PSNR'].idxmin()
random_idx = np.random.randint(0, len(results_df))

sample_indices = [best_idx, worst_idx, random_idx]
sample_names = ['Best Case', 'Worst Case', 'Random Sample']

for sample_idx, sample_name in zip(sample_indices, sample_names):
    img_name = results_df.iloc[sample_idx]['image_name']
    
    # Load all images
    gt = load_image(GROUND_TRUTH_DIR / f"{img_name}.png")
    damaged = load_image(DAMAGED_DIR / f"{img_name}.png")
    
    method_results = {}
    for method_name, method_dir in METHODS.items():
        method_path = method_dir / f"{img_name}.png"
        if method_path.exists():
            method_results[method_name] = load_image(method_path)
    
    # Create comparison figure
    num_methods = len(method_results)
    fig, axes = plt.subplots(2, 3 + num_methods, figsize=(20, 8))
    
    # First row: Ground truth, damaged, and all methods
    axes[0, 0].imshow(gt)
    axes[0, 0].set_title('Ground Truth', fontsize=12, fontweight='bold')
    axes[0, 0].axis('off')
    
    axes[0, 1].imshow(damaged)
    axes[0, 1].set_title('Damaged', fontsize=12, fontweight='bold')
    axes[0, 1].axis('off')
    
    for idx, (method_name, method_img) in enumerate(method_results.items(), start=2):
        axes[0, idx].imshow(method_img)
        psnr_val = results_df.iloc[sample_idx][f'{method_name}_PSNR']
        ssim_val = results_df.iloc[sample_idx][f'{method_name}_SSIM']
        axes[0, idx].set_title(f'{method_name}\nPSNR: {psnr_val:.2f}, SSIM: {ssim_val:.3f}', 
                               fontsize=10, fontweight='bold')
        axes[0, idx].axis('off')
    
    # Hide unused subplots
    for idx in range(2 + num_methods, 3 + num_methods):
        axes[0, idx].axis('off')
    
    # Second row: Difference maps
    axes[1, 0].axis('off')
    axes[1, 1].axis('off')
    
    for idx, (method_name, method_img) in enumerate(method_results.items(), start=2):
        diff = np.abs(gt.astype(float) - method_img.astype(float))
        diff_normalized = (diff / diff.max() * 255).astype(np.uint8) if diff.max() > 0 else diff.astype(np.uint8)
        axes[1, idx].imshow(diff_normalized)
        axes[1, idx].set_title(f'{method_name} Difference', fontsize=10)
        axes[1, idx].axis('off')
    
    # Hide unused subplots
    for idx in range(2 + num_methods, 3 + num_methods):
        axes[1, idx].axis('off')
    
    plt.suptitle(f'{sample_name}: {img_name}', fontsize=16, fontweight='bold', y=0.98)
    plt.tight_layout()
    plt.savefig(f'../comparison_{sample_name.lower().replace(" ", "_")}.png', dpi=300, bbox_inches='tight')
    plt.show()

print("Visual comparisons saved!")


## 5. Summary Report

Generate a text summary of findings.


In [None]:
# Generate summary report
report = f"""
# Mural Restoration Evaluation Report

## Dataset
- Total images evaluated: {len(results_df)}
- Methods compared: PDE, Patch, Deep Learning (U-Net), Hybrid

## Overall Performance Summary

### PSNR (Peak Signal-to-Noise Ratio) - Higher is Better
"""
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    mean_psnr = results_df[f'{method}_PSNR'].mean()
    std_psnr = results_df[f'{method}_PSNR'].std()
    report += f"- {method}: {mean_psnr:.4f} ± {std_psnr:.4f} dB\n"

report += "\n### SSIM (Structural Similarity Index) - Higher is Better\n"
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    mean_ssim = results_df[f'{method}_SSIM'].mean()
    std_ssim = results_df[f'{method}_SSIM'].std()
    report += f"- {method}: {mean_ssim:.4f} ± {std_ssim:.4f}\n"

report += "\n### LPIPS (Learned Perceptual Image Patch Similarity) - Lower is Better\n"
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    mean_lpips = results_df[f'{method}_LPIPS'].mean()
    std_lpips = results_df[f'{method}_LPIPS'].std()
    report += f"- {method}: {mean_lpips:.4f} ± {std_lpips:.4f}\n"

report += "\n### Edge Accuracy - Higher is Better\n"
for method in ['PDE', 'Patch', 'Deep', 'Hybrid']:
    mean_edge = results_df[f'{method}_EdgeAcc'].mean()
    std_edge = results_df[f'{method}_EdgeAcc'].std()
    report += f"- {method}: {mean_edge:.4f} ± {std_edge:.4f}\n"

# Find best method for each metric
best_psnr = summary_df.loc[summary_df['PSNR_mean'].idxmax(), 'Method']
best_ssim = summary_df.loc[summary_df['SSIM_mean'].idxmax(), 'Method']
best_lpips = summary_df.loc[summary_df['LPIPS_mean'].idxmin(), 'Method']
best_edge = summary_df.loc[summary_df['EdgeAcc_mean'].idxmax(), 'Method']

report += f"""
## Best Methods by Metric
- Best PSNR: {best_psnr}
- Best SSIM: {best_ssim}
- Best LPIPS (lowest): {best_lpips}
- Best Edge Accuracy: {best_edge}

## Conclusions
The evaluation shows the relative performance of different restoration methods on the mural dataset.
Visual comparisons and detailed metrics are available in the generated charts and CSV files.
"""

# Save report
with open('../evaluation_report.md', 'w') as f:
    f.write(report)

print(report)
print("\n✓ Report saved to evaluation_report.md")


## 6. Summary

Visualization complete! Generated outputs:
- Metric comparison charts (`metric_comparison.png`)
- Metric distribution box plots (`metric_distributions.png`)
- Side-by-side visual comparisons for best/worst/random cases
- Evaluation report (`evaluation_report.md`)

All visualizations provide comprehensive analysis of the restoration methods' performance.
