# Workflow of VAE-assisted NSGA2 LDSE using Remote Compute Orchastration

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import json
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# NSGA2 imports
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.core.problem import Problem
from pymoo.operators.crossover.pntx import PointCrossover
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PolynomialMutation
from pymoo.operators.sampling.rnd import FloatRandomSampling
from pymoo.termination import get_termination
from pymoo.core.callback import Callback
from multiprocessing.pool import ThreadPool
import pickle


# ================ MODIFIED CONFIG ===================
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_CLASSES = 10
INPUT_DIM = 4096
NUM_CATEGORIES = 10
SAVE_DIR = 'categorical_vae_models'
LATENT_DIM = 64   # We're using the 64-dimensional model

# NSGA2 Configuration - MODIFIED
GENERATIONS = 3
POPULATION = 14  
THREADS = 14    
SEED = 42


# Remote Servers Orchastration

In [None]:
# Adds SCP logic to send array_config_X.dat files before job starts
# Suppresses known_hosts warning without touching stderr

import subprocess
import os
import time
import threading
from datetime import timedelta

# -------- CONFIG SECTION -------- #
remote_configs = {
    "cadencea12@172.16.121.82": {"password": "caduser@123", "subdir": ["array_config_1", "array_config_2"]},
    "cadencea1@172.16.121.72": {"password": "caduser@123", "subdir": ["array_config_5", "array_config_6"]},
    "cadencea8@172.16.121.78": {"password": "caduser@123", "subdir": ["array_config_7", "array_config_8"]},
    "cadencea10@172.16.121.80": {"password": "caduser@123", "subdir": ["array_config_9", "array_config_10"]},
    "cadencea15@172.16.121.6": {"password": "caduser@123", "subdir": ["array_config_11", "array_config_12"]},
    "cadencea14@172.16.121.84": {"password": "caduser@123", "subdir": ["array_config_13", "array_config_14"]},
    "imt2022556_nishit@172.16.121.37": {"password": "$@Rl@1234", "subdir": ["array_config_3", "array_config_4"]},
}

def run_remote_compute_cluster_automation(generation_idx):
    config_mapping = {}
    for user_host, config in remote_configs.items():
        for subdir in config["subdir"]:
            config_num = int(subdir.split('_')[-1])
            config_mapping[config_num] = (user_host, subdir)

    log_dir_base = f"./gen/{generation_idx}/logs"
    csv_dir = os.path.join(log_dir_base, "csv_files")
    vitals_dir = os.path.join(log_dir_base, "vitals")

    os.makedirs(log_dir_base, exist_ok=True)
    os.makedirs(csv_dir, exist_ok=True)
    os.makedirs(vitals_dir, exist_ok=True)


    # vitals_thread = threading.Thread(target=monitor_vitals, daemon=True)
    # vitals_thread.start()

    # -------- SCP CONFIG FILES BEFORE JOB START -------- #
    for config_num in range(1, 15):
        user_host, subdir = config_mapping[config_num]
        remote_user, remote_host = user_host.split("@")
        password = remote_configs[user_host]["password"]

        local_dat = f"./gen/{generation_idx}/{subdir}.dat"
        remote_path = f"/home/{remote_user}/Documents/strassen/{subdir}/{subdir}.dat"

        if os.path.exists(local_dat):
            print(f"📤 Sending {local_dat} → {remote_user}@{remote_host}:{remote_path}")
            scp_cmd = [
                "sshpass", "-p", password,
                "scp",
                "-o", "StrictHostKeyChecking=no",
                "-o", "UserKnownHostsFile=/dev/null",
                local_dat,
                f"{remote_user}@{remote_host}:{remote_path}"
            ]
            result = subprocess.run(scp_cmd)
            if result.returncode == 0:
                print("✅ File sent successfully.")
            else:
                print("❌ SCP failed.")
        else:
            print(f"⚠️ Missing local file: {local_dat}")

    # -------- JOB LAUNCH SECTION -------- #
    processes = []

    for config_num in range(1, 15):
        user_host, subdir = config_mapping[config_num]
        remote_user, remote_host = user_host.split("@")
        remote_password = remote_configs[user_host]["password"]

        remote_path = f"/home/{remote_user}/Documents/strassen/{subdir}"
        log_file = os.path.join(log_dir_base, f"{config_num}.log")

        remote_command = f"""
            cd {remote_path} && \
            python3  approx_mult_modules_automate.py && \
            python3 modify_rtl.py && \
            csh -c "source cshrc && genus -legacy_ui -files script.tcl && exit" && \
            python3 get_metrics.py
        """

        full_command = [
            "sshpass", "-p", remote_password,
            "ssh", "-tt",
            "-o", "StrictHostKeyChecking=no",
            "-o", "UserKnownHostsFile=/dev/null",
            f"{remote_user}@{remote_host}",
            remote_command
        ]

        print(f"🚀 Launching job: Config {config_num} on {user_host} in {subdir} → {log_file}")
        start_time = time.time()

        f = open(log_file, "w")
        p = subprocess.Popen(full_command, stdout=f, stderr=subprocess.STDOUT)

        processes.append({
            "process": p,
            "stdout": f,
            "config_num": config_num,
            "user_host": user_host,
            "subdir": subdir,
            "log_file": log_file,
            "start_time": start_time
        })

    # -------- WAIT & SCP METRICS SECTION -------- #
    for job in processes:
        proc = job["process"]
        proc.wait()
        end_time = time.time()

        duration = timedelta(seconds=(end_time - job["start_time"]))
        job["stdout"].write(f"\n\n✅ Time taken: {duration}\n")
        job["stdout"].close()

        print(f"✅ Completed: Config {job['config_num']} on {job['user_host']} [{job['subdir']}]")
        print(f"🕒 Time taken: {duration}")
        print(f"📄 Log saved: {job['log_file']}")

        remote_user, remote_host = job["user_host"].split("@")
        remote_password = remote_configs[job["user_host"]]["password"]
        remote_csv_path = f"/home/{remote_user}/Documents/strassen/{job['subdir']}/metrics.csv"
        local_csv = os.path.join(csv_dir, f"{job['config_num']}.csv")

        print(f"📥 Fetching metrics.csv from Config {job['config_num']} → {local_csv}")

        scp_cmd = [
            "sshpass", "-p", remote_password,
            "scp",
            "-o", "StrictHostKeyChecking=no",
            "-o", "UserKnownHostsFile=/dev/null",
            f"{remote_user}@{remote_host}:{remote_csv_path}",
            local_csv
        ]
        result = subprocess.run(scp_cmd)

        if result.returncode == 0:
            print(f"✅ Metrics saved to: {local_csv}")
        else:
            print(f"❌ Failed to fetch metrics for Config {job['config_num']}")

# VAE Decoder Envokation

In [None]:
# ================ MODEL DEFINITIONS ===================
class CategoricalEncoder(nn.Module):
    def __init__(self, latent_dim, num_categories):
        super().__init__()
        self.latent_dim = latent_dim
        self.num_categories = num_categories
        
        base_hidden = max(512, latent_dim * 4)
        mid_hidden = max(256, latent_dim * 2)
        
        self.input_layer = nn.Linear(INPUT_DIM * NUM_CLASSES, base_hidden)
        
        num_res_blocks = max(2, min(4, latent_dim // 64))
        self.res_blocks = nn.ModuleList([
            self._make_res_block(base_hidden, base_hidden) for _ in range(num_res_blocks)
        ])
        
        bottleneck_layers = []
        current_dim = base_hidden
        target_dim = mid_hidden
        
        if latent_dim >= 256:
            bottleneck_layers.extend([
                nn.Linear(current_dim, current_dim // 2),
                nn.LayerNorm(current_dim // 2),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            current_dim = current_dim // 2
        
        bottleneck_layers.extend([
            nn.Linear(current_dim, target_dim),
            nn.LayerNorm(target_dim),
            nn.ReLU(),
            nn.Dropout(0.2)
        ])
        
        self.bottleneck = nn.Sequential(*bottleneck_layers)
        self.fc_out = nn.Linear(target_dim, latent_dim * num_categories)
        self._init_weights()
    
    def _make_res_block(self, in_dim, out_dim):
        return nn.Sequential(
            nn.Linear(in_dim, out_dim),
            nn.LayerNorm(out_dim),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(out_dim, out_dim),
            nn.LayerNorm(out_dim)
        )
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    
    def forward(self, x):
        x = F.relu(self.input_layer(x))
        
        for res_block in self.res_blocks:
            residual = x
            x = res_block(x)
            x = F.relu(x + residual)
        
        x = self.bottleneck(x)
        logits = self.fc_out(x).view(-1, self.latent_dim, self.num_categories)
        
        if self.training:
            noise = torch.randn_like(logits) * 0.01
            logits = logits + noise
        
        return logits

class CategoricalDecoder(nn.Module):
    def __init__(self, latent_dim, num_categories):
        super().__init__()
        
        min_hidden = max(256, latent_dim)
        mid_hidden = max(512, latent_dim * 2)
        max_hidden = max(1024, latent_dim * 3)
        
        layers = []
        current_dim = latent_dim * num_categories
        
        layers.extend([
            nn.Linear(current_dim, min_hidden),
            nn.LayerNorm(min_hidden),
            nn.ReLU(),
            nn.Dropout(0.3)
        ])
        current_dim = min_hidden
        
        if latent_dim >= 128:
            layers.extend([
                nn.Linear(current_dim, mid_hidden),
                nn.LayerNorm(mid_hidden),
                nn.ReLU(),
                nn.Dropout(0.3)
            ])
            current_dim = mid_hidden
        
        if latent_dim >= 256:
            layers.extend([
                nn.Linear(current_dim, max_hidden),
                nn.LayerNorm(max_hidden),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            current_dim = max_hidden
        
        layers.append(nn.Linear(current_dim, INPUT_DIM * NUM_CLASSES))
        self.fc = nn.Sequential(*layers)
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    
    def forward(self, z):
        z_flat = z.view(z.size(0), -1)
        x_recon = self.fc(z_flat)
        return x_recon.view(-1, INPUT_DIM, NUM_CLASSES)

# ================ VAE UTILITIES ===================
def load_model_fixed(latent_dim, model_path=None):
    """Load a trained model with fixed weights_only=False"""
    if model_path is None:
        model_path = f"{SAVE_DIR}/best_model_latent_{latent_dim}.pt"
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found: {model_path}")
    
    checkpoint = torch.load(model_path, map_location=DEVICE, weights_only=False)
    
    encoder = CategoricalEncoder(latent_dim, NUM_CATEGORIES).to(DEVICE)
    decoder = CategoricalDecoder(latent_dim, NUM_CATEGORIES).to(DEVICE)
    
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    decoder.load_state_dict(checkpoint['decoder_state_dict'])
    
    encoder.eval()
    decoder.eval()
    
    print(f"✅ Successfully loaded model with latent_dim={latent_dim}")
    return encoder, decoder, checkpoint

def gumbel_softmax_with_controls(logits, tau, hard=False, dim=-1):
    """Improved Gumbel-Softmax with better numerical stability"""
    if logits.requires_grad:
        gumbels = -torch.empty_like(logits).exponential_().log()
        gumbels = (logits + gumbels) / tau
    else:
        gumbels = logits / tau
    
    y_soft = F.softmax(gumbels, dim=dim)
    
    if hard:
        index = y_soft.max(dim=dim, keepdim=True)[1]
        y_hard = torch.zeros_like(logits).scatter_(dim, index, 1.0)
        ret = y_hard - y_soft.detach() + y_soft
    else:
        ret = y_soft
    
    return ret

# ================ LATENT SPACE ANALYSIS ===================
class LatentSpaceBounds:
    """Analyze and determine bounds for the latent space"""
    def __init__(self, encoder, decoder, num_samples=1000):
        self.encoder = encoder
        self.decoder = decoder
        self.bounds = None
        self._analyze_bounds(num_samples)
    
    def _analyze_bounds(self, num_samples):
        """Analyze the latent space to determine reasonable bounds"""
        print(f"🔍 Analyzing latent space bounds with {num_samples} samples...")
        
        # Generate diverse training data
        train_data = self._generate_diverse_data(num_samples)
        
        # Encode to get latent representations
        latent_samples = []
        with torch.no_grad():
            for i in range(0, len(train_data), 100):  # Process in batches
                batch = train_data[i:i+100]
                batch_tensor = torch.tensor(batch, dtype=torch.float32).to(DEVICE)
                x = batch_tensor.view(-1, INPUT_DIM * NUM_CLASSES)
                
                logits = self.encoder(x)
                z = gumbel_softmax_with_controls(logits, tau=0.5, hard=False)
                latent_samples.append(z.cpu().numpy())
        
        latent_samples = np.concatenate(latent_samples, axis=0)
        print(f"📊 Latent samples shape: {latent_samples.shape}")
        
        # Analyze bounds for each dimension and category
        self.bounds = {}
        self.bounds['min'] = np.min(latent_samples, axis=0)  # Shape: (64, 10)
        self.bounds['max'] = np.max(latent_samples, axis=0)  # Shape: (64, 10)
        self.bounds['mean'] = np.mean(latent_samples, axis=0)
        self.bounds['std'] = np.std(latent_samples, axis=0)
        
        print(f"📈 Latent bounds analysis:")
        print(f"   Global min: {self.bounds['min'].min():.4f}")
        print(f"   Global max: {self.bounds['max'].max():.4f}")
        print(f"   Mean range: [{self.bounds['mean'].min():.4f}, {self.bounds['mean'].max():.4f}]")
        print(f"   Std range: [{self.bounds['std'].min():.4f}, {self.bounds['std'].max():.4f}]")
    
    def _generate_diverse_data(self, num_samples):
        """Generate diverse training data for bound analysis"""
        def one_hot_encode(x, num_classes=NUM_CLASSES):
            x = np.asarray(x, dtype=np.int64)
            x = np.clip(x, 0, num_classes-1)
            return np.eye(num_classes)[x]
        
        data = []
        for _ in range(num_samples):
            pattern_type = np.random.choice(['linear', 'quadratic', 'sinusoidal', 'random'])
            sample = np.zeros(INPUT_DIM, dtype=np.int64)
            
            if pattern_type == 'linear':
                base = np.random.randint(0, 5)
                for i in range(INPUT_DIM):
                    sample[i] = (base + i // 500) % NUM_CLASSES
            elif pattern_type == 'quadratic':
                base = np.random.randint(0, 5)
                for i in range(INPUT_DIM):
                    sample[i] = (base + (i // 200) ** 2) % NUM_CLASSES
            elif pattern_type == 'sinusoidal':
                freq = np.random.uniform(0.001, 0.01)
                phase = np.random.uniform(0, 2*np.pi)
                for i in range(INPUT_DIM):
                    sample[i] = int((np.sin(freq * i + phase) + 1) * NUM_CLASSES // 2) % NUM_CLASSES
            else:
                sample = np.random.randint(0, NUM_CLASSES, INPUT_DIM)
                for i in range(1, INPUT_DIM-1):
                    if np.random.random() < 0.3:
                        sample[i] = sample[i-1]
            
            data.append(one_hot_encode(sample))
        
        return np.array(data)
    
    def get_sampling_bounds(self):
        """Get bounds for NSGA2 sampling"""
        # Use mean ± 2*std as bounds, but ensure we don't go beyond observed min/max
        lower_bounds = np.maximum(
            self.bounds['mean'] - 2 * self.bounds['std'],
            self.bounds['min']
        ).flatten()
        
        upper_bounds = np.minimum(
            self.bounds['mean'] + 2 * self.bounds['std'],
            self.bounds['max']
        ).flatten()
        
        return lower_bounds, upper_bounds

# Utility function for QOL

In [None]:
# =========================== UTILITIES ===================================================

def save_array_configs(solutions, generation_idx):
    """Save array configurations as .dat files for remote evaluation"""
    gen_dir = f"./gen/{generation_idx}"
    os.makedirs(gen_dir, exist_ok=True)
    
    print(f"💾 Saving {len(solutions)} array configurations for generation {generation_idx}...")
    
    for i, solution in enumerate(solutions):
        config_num = i + 1  # 1-indexed
        filename = f"{gen_dir}/array_config_{config_num}.dat"
        
        try:
            # Reshape flat latent vector back to (64, 10)
            latent_shaped = solution.reshape(LATENT_DIM, NUM_CATEGORIES)
            
            # Normalize to valid probability distribution
            latent_shaped = latent_shaped - latent_shaped.min(axis=1, keepdims=True)
            latent_shaped = latent_shaped / (latent_shaped.sum(axis=1, keepdims=True) + 1e-8)
            
            # Convert to tensor and decode using the global decoder
            with torch.no_grad():
                z_tensor = torch.tensor(latent_shaped, dtype=torch.float32).unsqueeze(0).to(DEVICE)
                decoded = global_decoder(z_tensor)
                
                # Convert to discrete values (argmax over categories)
                array_4096 = torch.argmax(decoded, dim=-1).squeeze().cpu().numpy()
            
            # Save as .dat file
            np.savetxt(filename, array_4096, fmt='%d')
            print(f"   ✅ Saved: {filename}")
            
        except Exception as e:
            print(f"   ❌ Error saving {filename}: {e}")
            # Save dummy array on error
            dummy_array = np.random.randint(0, NUM_CLASSES, INPUT_DIM)
            np.savetxt(filename, dummy_array, fmt='%d')


def generate_generation_plots(metrics, generation_idx):
    """Generate 6 separate plots for each generation"""
    plots_dir = f"./gen/{generation_idx}/plots"
    os.makedirs(plots_dir, exist_ok=True)
    
    print(f"📈 Generating 6 separate plots for generation {generation_idx}...")
    
    # Extract metrics
    power = metrics[:, 0]
    area = metrics[:, 1]
    delay = metrics[:, 2]
    
    # Plot 1: Power values
    plt.figure(figsize=(12, 8))
    bars1 = plt.bar(range(1, 15), power, color='red', alpha=0.7)
    plt.title(f'Power Values - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Solution Number', fontsize=14)
    plt.ylabel('Power', fontsize=14)
    plt.xticks(range(1, 15))
    plt.grid(True, alpha=0.3)
    # Add value labels on bars
    for i, bar in enumerate(bars1):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
                f'{power[i]:.1f}', ha='center', va='bottom', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_power.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 2: Area values
    plt.figure(figsize=(12, 8))
    bars2 = plt.bar(range(1, 15), area, color='blue', alpha=0.7)
    plt.title(f'Area Values - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Solution Number', fontsize=14)
    plt.ylabel('Area', fontsize=14)
    plt.xticks(range(1, 15))
    plt.grid(True, alpha=0.3)
    # Add value labels on bars
    for i, bar in enumerate(bars2):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
                f'{area[i]:.1f}', ha='center', va='bottom', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_area.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 3: Delay values
    plt.figure(figsize=(12, 8))
    bars3 = plt.bar(range(1, 15), delay, color='green', alpha=0.7)
    plt.title(f'Delay Values - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Solution Number', fontsize=14)
    plt.ylabel('Delay', fontsize=14)
    plt.xticks(range(1, 15))
    plt.grid(True, alpha=0.3)
    # Add value labels on bars
    for i, bar in enumerate(bars3):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
                f'{delay[i]:.1f}', ha='center', va='bottom', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_delay.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 4: Power vs Area
    plt.figure(figsize=(10, 8))
    scatter1 = plt.scatter(power, area, c=range(14), cmap='viridis', s=150, alpha=0.8, edgecolors='black', linewidth=1)
    plt.title(f'Power vs Area - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Power', fontsize=14)
    plt.ylabel('Area', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.colorbar(scatter1, label='Solution Number')
    # Add solution numbers as labels
    for i in range(14):
        plt.annotate(f'{i+1}', (power[i], area[i]), xytext=(8, 8), 
                    textcoords='offset points', fontsize=12, fontweight='bold',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_power_vs_area.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 5: Power vs Delay
    plt.figure(figsize=(10, 8))
    scatter2 = plt.scatter(power, delay, c=range(14), cmap='plasma', s=150, alpha=0.8, edgecolors='black', linewidth=1)
    plt.title(f'Power vs Delay - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Power', fontsize=14)
    plt.ylabel('Delay', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.colorbar(scatter2, label='Solution Number')
    # Add solution numbers as labels
    for i in range(14):
        plt.annotate(f'{i+1}', (power[i], delay[i]), xytext=(8, 8), 
                    textcoords='offset points', fontsize=12, fontweight='bold',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_power_vs_delay.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 6: Area vs Delay
    plt.figure(figsize=(10, 8))
    scatter3 = plt.scatter(area, delay, c=range(14), cmap='coolwarm', s=150, alpha=0.8, edgecolors='black', linewidth=1)
    plt.title(f'Area vs Delay - Generation {generation_idx}', fontsize=16, fontweight='bold')
    plt.xlabel('Area', fontsize=14)
    plt.ylabel('Delay', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.colorbar(scatter3, label='Solution Number')
    # Add solution numbers as labels
    for i in range(14):
        plt.annotate(f'{i+1}', (area[i], delay[i]), xytext=(8, 8), 
                    textcoords='offset points', fontsize=12, fontweight='bold',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    plt.tight_layout()
    plt.savefig(f"{plots_dir}/generation_{generation_idx}_area_vs_delay.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"   ✅ 6 separate plots saved:")
    print(f"      - {plots_dir}/generation_{generation_idx}_power.png")
    print(f"      - {plots_dir}/generation_{generation_idx}_area.png")
    print(f"      - {plots_dir}/generation_{generation_idx}_delay.png")
    print(f"      - {plots_dir}/generation_{generation_idx}_power_vs_area.png")
    print(f"      - {plots_dir}/generation_{generation_idx}_power_vs_delay.png")
    print(f"      - {plots_dir}/generation_{generation_idx}_area_vs_delay.png")

# Solution Evalution Function

In [None]:
# ================ EVALUATING METRICS FUNCTION ===================
def read_metrics_from_csv(generation_idx):
    """Read power, area, delay metrics from CSV files"""
    csv_dir = f"./gen/{generation_idx}/logs/csv_files"
    
    print(f"📊 Reading metrics from {csv_dir}...")
    
    metrics = []
    for config_num in range(1, 15):  # 1 to 14
        csv_file = f"{csv_dir}/{config_num}.csv"
        
        try:
            if os.path.exists(csv_file):
                # Read CSV file
                import pandas as pd
                df = pd.read_csv(csv_file)
                
                # Extract power, area, delay (assuming these are column names)
                # Adjust column names based on your actual CSV format
                power = float(df['power'].iloc[0]) if 'power' in df.columns else float(df.iloc[0, 0])
                area = float(df['area'].iloc[0]) if 'area' in df.columns else float(df.iloc[0, 1])
                delay = float(df['delay'].iloc[0]) if 'delay' in df.columns else float(df.iloc[0, 2])
                
                print(f"   Config {config_num}: P={power:.2f}, A={area:.2f}, D={delay:.2f}")
                
            else:
                print(f"   ❌ Missing: {csv_file}, using default values")
                power, area, delay = 100.0, 200.0, 50.0  # Default values
                
        except Exception as e:
            print(f"   ❌ Error reading {csv_file}: {e}")
            power, area, delay = 100.0, 200.0, 50.0  # Default values
        
        # Generate artificial SSIM and PSNR (same as before)
        np.random.seed(int((power + area + delay) * 1000) % 2**32)
        ssim_loss = 0.1 + np.random.exponential(0.05)
        psnr_loss = 5 + np.random.exponential(2)
        
        metrics.append([power, area, delay, ssim_loss, psnr_loss])
    
    return np.array(metrics)

# NSGA LOOP

In [None]:
# ================ NSGA2 CALLBACK ===================
class VerboseCallback(Callback):
    def __init__(self):
        self.data = []
        self.generation = 0
    
    def __call__(self, algorithm):
        F = algorithm.pop.get('F')
        self.data.append(F)
        
        print(f"\n📊 Generation {self.generation + 1}/{GENERATIONS} Results:")
        print(f"   Population size: {len(F)}")
        print(f"   Objective ranges:")
        for i in range(F.shape[1]):
            obj_name = ['Power', 'Area', 'Delay', 'SSIM_Loss', 'PSNR_Loss'][i]
            print(f"     {obj_name}: [{F[:, i].min():.3f}, {F[:, i].max():.3f}]")
        
        # Find best solutions for each objective
        best_indices = np.argmin(F, axis=0)
        print(f"   Best solutions:")
        for i, obj_name in enumerate(['Power', 'Area', 'Delay', 'SSIM_Loss', 'PSNR_Loss']):
            best_idx = best_indices[i]
            best_val = F[best_idx, i]
            print(f"     {obj_name}: Solution {best_idx+1} = {best_val:.3f}")
        



# ================ NSGA2 PROBLEM CLASS ===================
class VAELatentOptimizationProblem(Problem):
    def __init__(self, decoder, bounds_analyzer, **kwargs):
        self.decoder = decoder
        self.bounds_analyzer = bounds_analyzer
        self.current_generation = 0
        
        # Get bounds for the latent space (64 * 10 = 640 variables)
        self.xl, self.xu = bounds_analyzer.get_sampling_bounds()
        
        print(f"🎯 Problem setup:")
        print(f"   Variables: {len(self.xl)} (64 dims × 10 categories)")
        print(f"   Objectives: 5 (power, area, delay, ssim_loss, psnr_loss)")
        print(f"   Bounds: [{self.xl.min():.4f}, {self.xu.max():.4f}]")
        
        super().__init__(
            n_var=len(self.xl),  # 64 * 10 = 640
            n_obj=5,  # power, area, delay, ssim_loss, psnr_loss
            xl=self.xl,
            xu=self.xu,
            elementwise_evaluation=False,
            **kwargs
        )
        
        self.evaluation_count = 0
    
    def _evaluate(self, X, out, *args, **kwargs):
        """Evaluate population using remote servers"""
        print(f"\n🔄 Evaluating Generation {self.current_generation + 1} with {len(X)} solutions...")
        
        # Save array configurations as .dat files
        save_array_configs(X, self.current_generation + 1)
        
        print(f"🚀 Starting remote evaluation (this will take ~3 hours)...")
        run_remote_compute_cluster_automation(self.current_generation + 1)
        
        # Read metrics from CSV files
        metrics = read_metrics_from_csv(self.current_generation + 1)
        
        # Generate plots for this generation
        generate_generation_plots(metrics, self.current_generation + 1)
        
        # Set objectives in the output dictionary
        out["F"] = metrics
        
        print(f"✅ Generation {self.current_generation + 1} evaluation complete!")
        self.evaluation_count += len(X)
        self.current_generation += 1
    
    def evaluate_single(self, latent_flat, thread_id):
        """Evaluate a single solution"""
        try:
            # Reshape flat latent vector back to (64, 10)
            latent_shaped = latent_flat.reshape(LATENT_DIM, NUM_CATEGORIES)
            
            # Normalize to valid probability distribution (ensure each row sums to 1)
            latent_shaped = latent_shaped - latent_shaped.min(axis=1, keepdims=True)  # Make positive
            latent_shaped = latent_shaped / (latent_shaped.sum(axis=1, keepdims=True) + 1e-8)  # Normalize
            
            # Convert to tensor and decode
            with torch.no_grad():
                z_tensor = torch.tensor(latent_shaped, dtype=torch.float32).unsqueeze(0).to(DEVICE)
                decoded = self.decoder(z_tensor)
                
                # Convert to discrete values (argmax over categories)
                array_4096 = torch.argmax(decoded, dim=-1).squeeze().cpu().numpy()
            
            # Get metrics
            power, area, delay, ssim_loss, psnr_loss = get_metrics(array_4096)
            
            print(f"   Solution {thread_id}: P={power:.2f}, A={area:.2f}, D={delay:.2f}, S={ssim_loss:.3f}, PSNR={psnr_loss:.2f}")
            
            return [power, area, delay, ssim_loss, psnr_loss]
            
        except Exception as e:
            print(f"❌ Error in solution {thread_id}: {e}")
            return [1000, 1000, 1000, 10, 100]  # Penalty values
    
    def close_pool(self):
        """Close the thread pool"""
        self.pool.close()




# ================ MAIN OPTIMIZATION FUNCTION ===================
def run_vae_nsga2_optimization():
    """Main function to run NSGA2 optimization in VAE latent space with remote evaluation"""
    global global_decoder
    
    print("=" * 80)
    print("🚀 NSGA2 VAE Latent Space Optimization with Remote Evaluation")
    print("=" * 80)
    
    # Load the VAE model
    print("📦 Loading VAE model...")
    encoder, decoder, checkpoint = load_model_fixed(LATENT_DIM)
    global_decoder = decoder  # Set global decoder
    
    # Analyze latent space bounds
    print("🔍 Analyzing latent space...")
    bounds_analyzer = LatentSpaceBounds(encoder, decoder, num_samples=500)
    
    # Create NSGA2 problem
    print("🎯 Setting up NSGA2 problem...")
    problem = VAELatentOptimizationProblem(decoder, bounds_analyzer)
    
    # Setup callback
    callback = VerboseCallback()
    
    # Configure NSGA2 algorithm
    algorithm = NSGA2(
        pop_size=POPULATION,
        sampling=FloatRandomSampling(),
        crossover=SBX(prob=0.9, eta=15),
        mutation=PolynomialMutation(prob=1.0/problem.n_var, eta=20),
    )
    
    # Setup termination
    termination = get_termination("n_gen", GENERATIONS)
    
    print("🏃‍♂️ Starting optimization...")
    print(f"   Algorithm: NSGA2")
    print(f"   Population: {POPULATION}")
    print(f"   Generations: {GENERATIONS}")
    print(f"   ⚠️  WARNING: Each generation takes ~3 hours!")
    print(f"   ⏰ Total estimated time: {GENERATIONS * 3} hours")
    
    # Run optimization
    result = minimize(
        problem,
        algorithm,
        termination,
        callback=callback,
        seed=SEED,
        verbose=True
    )
    
    print("\n" + "=" * 80)
    print("🎉 Optimization Complete!")
    print("=" * 80)
    
    # Display results
    print(f"📊 Final Results:")
    print(f"   Solutions found: {len(result.F)}")
    print(f"   Objective ranges:")
    for i, obj_name in enumerate(['Power', 'Area', 'Delay', 'SSIM_Loss', 'PSNR_Loss']):
        print(f"     {obj_name}: [{result.F[:, i].min():.3f}, {result.F[:, i].max():.3f}]")
    
    # Save results
    save_results(result, callback)
    
    # Plot convergence across all generations
    plot_convergence(callback.data)
    
    return result, callback




# ================ RESULTS HANDLING ===================
def save_results(result, callback):
    """Save optimization results"""
    results_dir = "nsga2_vae_results"
    os.makedirs(results_dir, exist_ok=True)
    
    # Save final solutions
    np.save(f"{results_dir}/final_solutions_X.npy", result.X)
    np.save(f"{results_dir}/final_solutions_F.npy", result.F)
    
    # Save convergence data
    convergence_data = np.array(callback.data)
    np.save(f"{results_dir}/convergence_data.npy", convergence_data)
    
    # Save summary
    summary = {
        'timestamp': datetime.now().isoformat(),
        'latent_dim': LATENT_DIM,
        'generations': GENERATIONS,
        'population': POPULATION,
        'final_solutions_count': len(result.F),
        'objective_names': ['Power', 'Area', 'Delay', 'SSIM_Loss', 'PSNR_Loss'],
        'objective_ranges': {
            f'obj_{i}': [float(result.F[:, i].min()), float(result.F[:, i].max())]
            for i in range(result.F.shape[1])
        }
    }
    
    with open(f"{results_dir}/summary.json", 'w') as f:
        json.dump(summary, f, indent=2)
    
    print(f"💾 Results saved to {results_dir}/")

def plot_convergence(convergence_data):
    """Plot convergence of objectives over generations"""
    data = np.array(convergence_data)  # Shape: (generations, population, objectives)
    
    obj_names = ['Power', 'Area', 'Delay', 'SSIM Loss', 'PSNR Loss']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()
    
    for obj_idx in range(5):
        ax = axes[obj_idx]
        
        # Extract min, mean, max for each generation
        obj_data = data[:, :, obj_idx]  # Shape: (generations, population)
        
        generations = range(1, len(obj_data) + 1)
        min_vals = np.min(obj_data, axis=1)
        mean_vals = np.mean(obj_data, axis=1)
        max_vals = np.max(obj_data, axis=1)
        
        # Plot
        ax.plot(generations, min_vals, 'g-', label='Best', linewidth=2)
        ax.plot(generations, mean_vals, 'b--', label='Mean', linewidth=1)
        ax.plot(generations, max_vals, 'r:', label='Worst', linewidth=1)
        ax.fill_between(generations, min_vals, max_vals, alpha=0.2)
        
        ax.set_title(f'{obj_names[obj_idx]} Convergence')
        ax.set_xlabel('Generation')
        ax.set_ylabel(obj_names[obj_idx])
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    # Remove empty subplot
    axes[-1].remove()
    
    plt.tight_layout()
    plt.savefig('nsga2_vae_results/convergence_plot.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("📈 Convergence plot saved to nsga2_vae_results/convergence_plot.png")

# ================ RUN OPTIMIZATION ===================
if __name__ == "__main__":
    print(f"🚀 Starting NSGA2 optimization in {LATENT_DIM}D latent space")
    print(f"Device: {DEVICE}")
    print(f"Generations: {GENERATIONS}, Population: {POPULATION}")
    result, callback = run_vae_nsga2_optimization()