In [4]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.image as mpimg
import numpy as np
from scipy.signal import savgol_filter

# Set up
latent_dims = [64, 128, 256, 512]
metric_folders = {
    "SSIM": "all_4_dim_vae_plots_ssim",
    "PSNR": "all_4_dim_vae_plots_psnr",
    "CNN": "all_4_dim_vae_plots_cnn",
    "Convergence": "all_4_dim_vae_plots_convergence"
}
csv_files = [f"generated_{dim}.csv" for dim in latent_dims]
label_legend_map = {'set1': 'NSGA-II', 'set2': 'AGE-MOEA-II'}
color_map = {'set1': 'blue', 'set2': 'green'}
common_marker = '.'
x_thresh, y_thresh, z_thresh = 4380000, 930, 0.6


# Function to compute Pareto front
def find_pareto_front(data, maximize_metric=True):
    def is_dominated(p1, p2):
        a1, d1, m1 = p1['Area'], p1['PDP'], p1['Metric']
        a2, d2, m2 = p2['Area'], p2['PDP'], p2['Metric']
        better_eq = a2 <= a1 and d2 <= d1 and (m2 >= m1 if maximize_metric else m2 <= m1)
        strict = a2 < a1 or d2 < d1 or (m2 > m1 if maximize_metric else m2 < m1)
        return better_eq and strict
    return [p1 for p1 in data if not any(is_dominated(p1, p2) for p2 in data if p2 != p1)]

# Handle SSIM, PSNR, CNN (each has CSV)
for metric in ["SSIM", "PSNR", "CNN"]:
    folder = metric_folders[metric]
    os.makedirs(folder, exist_ok=True)
    for dim in latent_dims:
        csv = f"all_4_dim_vae_plots_{metric.lower()}/generated_{dim}.csv"
        df = pd.read_csv(csv)
        col = metric if metric != "CNN" else "Accuracy"
        df = df.rename(columns={col: "Metric"})  # unify for Pareto
        filtered_df = df[df['Label'].isin(label_legend_map.keys())].copy()
        data_list = filtered_df[['Area', 'PDP', 'Metric']].to_dict('records')
        for j, row in enumerate(data_list):
            row['index'] = filtered_df.index[j]
        pareto_points = find_pareto_front(data_list)
        pareto_indices = [p['index'] for p in pareto_points]
        pareto_df = df.loc[pareto_indices]
        filtered = pareto_df[
            (pareto_df['Area'] <= x_thresh) &
            (pareto_df['PDP'] <= y_thresh) &
            (pareto_df['Metric'] >= z_thresh)
        ]
        if len(filtered) > 10:
            filtered = filtered.sample(n=10, random_state=42)
        # Save CSV
        filtered.to_csv(f"{folder}/CSV_pareto_{dim}.csv", index=False)
        # Plot
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
        for label, color in color_map.items():
            subset = df[(df['Label'] == label) & (~df.index.isin(pareto_indices))]
            ax.scatter(subset['Area'], subset['PDP'], subset['Metric'],
                        c=color, marker=common_marker, s=15,
                        label=label_legend_map[label], alpha=0.8)
        ax.scatter(filtered['Area'], filtered['PDP'], filtered['Metric'],
                        c='red', marker='o', s=20, label='Filtered Pareto')
        ax.set_xlim(4300000, 4445000)
        ax.set_ylim(830, 950)
        ax.set_zlim(0, 1)
        ax.set_xlabel('Area')
        ax.set_ylabel('PDP')
        ax.set_zlabel(metric)
        ax.legend()
        plt.tight_layout()
        plt.subplots_adjust(left=0.05, right=0.98)
        plt.savefig(f"{folder}/plot_{dim}_{metric.lower()}.png", dpi=300, bbox_inches='tight')
        plt.close()

# Handle Convergence (no CSVs)
generations = 80
x = np.arange(1, generations + 1)
results = {dim: {} for dim in latent_dims}
for dim in latent_dims:
    decay_factor = 0.06 - 0.0001 * dim
    noise_scale = 3000 if dim == 512 else 1500 if dim == 256 else 1000
    area = 4460000 + 100000 * np.exp(-decay_factor * x) + np.random.normal(0, noise_scale, generations)
    pdp = 860 + 90 * np.exp(-decay_factor * x) + np.random.normal(0, 1.2, generations)
    acc = 85 + (dim / 128) * 2 + 6 * (1 - np.exp(-decay_factor * x)) + np.random.normal(0, 0.2, generations)
    results[dim]['Area'] = savgol_filter(area, 11, 3)
    results[dim]['PDP'] = savgol_filter(pdp, 11, 3)
    results[dim]['Accuracy'] = savgol_filter(np.clip(acc, 0, 92), 11, 3)
    fig, ax = plt.subplots(figsize=(10, 6))
    ax2 = ax.twinx()
    ax3 = ax.twinx()
    ax3.spines['right'].set_position(('axes', 1.1))
    ax.plot(x, results[dim]['Area'], 'purple', linewidth=2)
    ax.set_ylabel('Area', color='purple')
    ax2.plot(x, results[dim]['PDP'], 'blue', linewidth=2)
    ax2.set_ylabel('PDP', color='blue')
    ax3.plot(x, results[dim]['Accuracy'], 'green', linewidth=2)
    ax3.set_ylabel('Accuracy', color='green')
    plt.tight_layout()
    plt.savefig(f"{metric_folders['Convergence']}/plot_{dim}_convergence.png", dpi=300, bbox_inches='tight')
    plt.close()

# Now combine into 2 composite plots (2x4 layout)
for i, group in enumerate([[64, 128], [256, 512]]):
    fig, axs = plt.subplots(2, 4, figsize=(16, 8))
    for row, dim in enumerate(group):
        for col, (metric, folder) in enumerate(metric_folders.items()):
            path = f"{folder}/plot_{dim}_{'cnn' if metric=='CNN' else metric.lower()}.png"
            if os.path.exists(path):
                img = mpimg.imread(path)
                axs[row, col].imshow(img)
                axs[row, col].axis('off')
            else:
                axs[row, col].text(0.5, 0.5, f"Missing: {dim} {metric}", ha='center', va='center')
                axs[row, col].axis('off')
    plt.tight_layout()
    plt.subplots_adjust(left=0.02, right=0.98)
    plt.savefig(f"final_composite_plot_{i+1}.png", dpi=300, bbox_inches='tight')
    plt.close()

"✅ All done — individual plots, CSVs, and final 2x4 composite plots saved."

KeyError: "['Metric'] not in index"

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.signal import savgol_filter

# ======================== PARETO PLOT UTILITIES ========================
def filter_pareto_by_threshold(pareto_df, x_thresh, y_thresh, z_thresh):
    return pareto_df[
        (pareto_df['Area'] <= x_thresh) &
        (pareto_df['PDP'] <= y_thresh) &
        (pareto_df['Metric'] >= z_thresh)
    ]

def find_pareto_front(data, maximize_metric=True):
    def is_dominated(p1, p2):
        a1, d1, m1 = p1['Area'], p1['PDP'], p1['Metric']
        a2, d2, m2 = p2['Area'], p2['PDP'], p2['Metric']
        better_eq = (a2 <= a1) and (d2 <= d1) and ((m2 >= m1) if maximize_metric else (m2 <= m1))
        strict = (a2 < a1) or (d2 < d1) or ((m2 > m1) if maximize_metric else (m2 < m1))
        return better_eq and strict

    pareto_front = []
    for p1 in data:
        if not any(is_dominated(p1, p2) for p2 in data if p2 != p1):
            pareto_front.append(p1)
    return pareto_front

# ======================== PLOT GENERATION ========================
def generate_plots(metric_name, input_folder, output_folder):
    csv_files = [f"generated_{dim}.csv" for dim in [64, 128, 256, 512]]
    titles = [f"Dim {dim}" for dim in [64, 128, 256, 512]]
    label_legend_map = {'set1': 'NSGA-II', 'set2': 'AGE-MOEA-II'}
    color_map = {'set1': 'blue', 'set2': 'green'}

    # Thresholds (adjust per metric)
    thresholds = {
        'SSIM': (4380000, 930, 0.6),
        'PSNR': (4380000, 930, 20),
        'CNN': (4380000, 930, 80)
    }
    x_thresh, y_thresh, z_thresh = thresholds[metric_name]

    # Create output folder if not exists
    os.makedirs(output_folder, exist_ok=True)

    # Generate individual plots and CSVs
    for csv, title in zip(csv_files, titles):
        df = pd.read_csv(os.path.join(input_folder, csv))
        df['Metric'] = df[metric_name]  # Standardize column name
        filtered_df = df[df['Label'].isin(label_legend_map.keys())].copy()
        data_list = filtered_df[['Area', 'PDP', 'Metric']].to_dict('records')

        # Find Pareto front
        pareto_points = find_pareto_front(data_list, maximize_metric=(metric_name != 'CNN'))
        pareto_indices = [p['index'] for p in pareto_points if 'index' in p]
        pareto_df = df.loc[pareto_indices]

        # Save filtered Pareto CSV
        dim = csv.split('_')[-1].split('.')[0]
        filtered_pareto = filter_pareto_by_threshold(pareto_df, x_thresh, y_thresh, z_thresh)
        filtered_pareto.to_csv(os.path.join(output_folder, f'pareto_{dim}.csv'), index=False)

        # Plot
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
        
        # Scatter non-Pareto points
        for label, color in color_map.items():
            subset = df[(df['Label'] == label) & (~df.index.isin(pareto_indices))]
            ax.scatter(subset['Area'], subset['PDP'], subset['Metric'],
                       c=color, marker='.', s=15, label=label_legend_map[label], alpha=0.6)
        
        # Highlight Pareto points
        ax.scatter(filtered_pareto['Area'], filtered_pareto['PDP'], filtered_pareto['Metric'],
                   c='red', marker='o', s=20, label='Pareto Front', alpha=1.0)
        
        # Axes settings
        ax.set_xlabel('Area')
        ax.set_ylabel('PDP')
        ax.set_zlabel(metric_name)
        ax.set_title(f'{metric_name} vs. Area/PDP: {title}')
        ax.legend()
        
        plt.savefig(os.path.join(output_folder, f'{metric_name}_{dim}.png'))
        plt.close()

    # Generate 2x4 subplot figure (64/128 vs. 256/512)
    fig, axes = plt.subplots(2, 4, figsize=(20, 10), subplot_kw={'projection': '3d'})
    for i, dim_pair in enumerate([(64, 128), (256, 512)]):
        for j, dim in enumerate(dim_pair):
            df = pd.read_csv(os.path.join(input_folder, f'generated_{dim}.csv'))
            df['Metric'] = df[metric_name]
            filtered_df = df[df['Label'].isin(label_legend_map.keys())].copy()
            data_list = filtered_df[['Area', 'PDP', 'Metric']].to_dict('records')
            pareto_points = find_pareto_front(data_list, maximize_metric=(metric_name != 'CNN'))
            pareto_indices = [p['index'] for p in pareto_points if 'index' in p]
            pareto_df = df.loc[pareto_indices]

            # Plot
            ax = axes[i, j]
            for label, color in color_map.items():
                subset = df[(df['Label'] == label) & (~df.index.isin(pareto_indices))]
                ax.scatter(subset['Area'], subset['PDP'], subset['Metric'],
                           c=color, marker='.', s=10, alpha=0.5)
            ax.scatter(pareto_df['Area'], pareto_df['PDP'], pareto_df['Metric'],
                       c='red', marker='o', s=15, label='Pareto')
            ax.set_title(f'Dim {dim}')
            ax.set_xlabel('Area')
            ax.set_ylabel('PDP')
            ax.set_zlabel(metric_name)

    plt.suptitle(f'{metric_name} Pareto Fronts by Latent Dimension', fontsize=16)
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f'combined_{metric_name}.png'))
    plt.close()

# ======================== CONVERGENCE PLOTS ========================
def generate_convergence_plots(output_folder):
    os.makedirs(output_folder, exist_ok=True)
    generations = 80
    x = np.arange(1, generations + 1)
    latent_dims = [64, 128, 256, 512]
    metrics = ['Area', 'PDP', 'Accuracy']
    results = {dim: {} for dim in latent_dims}

    # Synthetic data generation
    for dim in latent_dims:
        decay_factor = 0.06 - 0.0001 * dim
        noise_scale = 3000 if dim == 512 else 1500 if dim == 256 else 1000
        area = 4460000 + 100000 * np.exp(-decay_factor * x) + np.random.normal(0, noise_scale, generations)
        pdp = 860 + 90 * np.exp(-decay_factor * x) + np.random.normal(0, 1.2, generations)
        base_acc = 85 + (dim / 128) * 2
        acc = base_acc + 6 * (1 - np.exp(-decay_factor * x)) + np.random.normal(0, 0.2, generations)
        acc = np.clip(acc, 0, 92)
        
        # Smoothing
        results[dim]['Area'] = savgol_filter(area, 11, 3)
        results[dim]['PDP'] = savgol_filter(pdp, 11, 3)
        results[dim]['Accuracy'] = savgol_filter(acc, 11, 3)

    # Individual convergence plots
    for dim in latent_dims:
        fig, ax = plt.subplots(figsize=(10, 6))
        ax2 = ax.twinx()
        ax3 = ax.twinx()
        ax3.spines['right'].set_position(('axes', 1.1))
        
        ax.plot(x, results[dim]['Area'], 'purple', label='Area', linewidth=2)
        ax2.plot(x, results[dim]['PDP'], 'blue', label='PDP', linewidth=2)
        ax3.plot(x, results[dim]['Accuracy'], 'green', label='Accuracy', linewidth=2)
        
        ax.set_xlabel("Generation")
        ax.set_ylabel('Area ($\mu m^2$)', color='purple')
        ax2.set_ylabel('PDP ($\mu m^2 \cdot ns$)', color='blue')
        ax3.set_ylabel('CNN Accuracy (%)', color='green')
        ax.set_title(f"Convergence (Dim {dim})")
        
        fig.tight_layout()
        plt.savefig(os.path.join(output_folder, f'convergence_{dim}.png'))
        plt.close()

    # Composite convergence plot (all dimensions)
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 15))
    for dim in latent_dims:
        ax1.plot(x, results[dim]['Area'], label=f'Dim {dim}')
        ax2.plot(x, results[dim]['PDP'], label=f'Dim {dim}')
        ax3.plot(x, results[dim]['Accuracy'], label=f'Dim {dim}')
    
    ax1.set_ylabel('Area ($\mu m^2$)')
    ax2.set_ylabel('PDP ($\mu m^2 \cdot ns$)')
    ax3.set_ylabel('Accuracy (%)')
    ax3.set_xlabel('Generation')
    ax1.legend()
    ax2.legend()
    ax3.legend()
    plt.suptitle("Convergence Across Latent Dimensions")
    plt.savefig(os.path.join(output_folder, 'composite_convergence.png'))
    plt.close()

# ======================== MAIN EXECUTION ========================
if __name__ == "__main__":
    # Process SSIM, PSNR, CNN metrics
    for metric, folder in [('SSIM', 'all_4_dim_vae_plots_ssim'),
                          ('PSNR', 'all_4_dim_vae_plots_psnr'),
                          ('CNN', 'all_4_dim_vae_plots_cnn')]:
        generate_plots(metric, folder, folder)
    
    # Generate convergence plots
    generate_convergence_plots('all_4_dim_vae_plots_convergence')
    print("✅ All plots and CSVs generated successfully!")