In [None]:
# Imports and setup (needed when 02-06 run in separate kernel)
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import random
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Repo root for src imports
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
except Exception:
    pass
def _find_repo_root():
    cwd = Path.cwd().resolve()
    for p in [Path('/content/drive/MyDrive/multihead-attention-robustness'),
              Path('/content/drive/My Drive/multihead-attention-robustness'),
              Path('/content/repo_run')]:
        if (p / 'src').exists():
            return p
    drive_root = Path('/content/drive')
    if drive_root.exists():
        for base in [drive_root / 'MyDrive', drive_root / 'My Drive', drive_root]:
            p = base / 'multihead-attention-robustness'
            if p.exists() and (p / 'src').exists():
                return p
    p = cwd
    for _ in range(10):
        if (p / 'src').exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return cwd.parent if cwd.name == 'notebooks' else cwd
repo_root = _find_repo_root()
sys.path.insert(0, str(repo_root))
from src.models.feature_token_transformer import FeatureTokenTransformer, SingleHeadTransformer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
# Preserve models/training_history from notebook 02 when run in pipeline (01→02→03)
if 'models' not in globals() or not isinstance(globals().get('models'), dict) or len(globals().get('models', {})) == 0:
    models = {}
if 'training_history' not in globals() or not isinstance(globals().get('training_history'), dict) or len(globals().get('training_history', {})) == 0:
    training_history = {}
TRAINING_CONFIG = {
    'ols': {}, 'ridge': {'alpha': 1.0},
    'mlp': {'hidden_dims': [128, 64], 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100, 'patience': 10},
    'transformer': {'d_model': 72, 'num_heads': 8, 'num_layers': 2, 'd_ff': 512, 'dropout': 0.1,
                   'learning_rate': 0.0001, 'batch_size': 32, 'epochs': 100, 'patience': 20}
}


Mounted at /content/drive


In [None]:
# Load fresh data from master_table.csv (standalone: each notebook pulls its own data)
data_path = repo_root / 'data' / 'cross_sectional' / 'master_table.csv'
df = pd.read_csv(data_path)
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')
class CrossSectionalDataSplitter:
    def __init__(self, train_start='2005-01-01', train_end='2017-12-31', val_start='2018-01-01', val_end='2019-12-31'):
        self.train_start, self.train_end = train_start, train_end
        self.val_start, self.val_end = val_start, val_end
    def split(self, master_table):
        master_table = master_table.copy()
        master_table.index = pd.to_datetime(master_table.index)
        return {'train': master_table.loc[self.train_start:self.train_end], 'val': master_table.loc[self.val_start:self.val_end]}
    def prepare_features_labels(self, data):
        if data.empty:
            return pd.DataFrame(), pd.Series()
        numeric_data = data.select_dtypes(include=[np.number])
        if numeric_data.empty:
            return pd.DataFrame(), pd.Series()
        exclude_cols = ['mktcap', 'market_cap', 'date', 'year', 'month', 'ticker', 'permno', 'gvkey']
        target_cols = ['return', 'returns', 'ret', 'target', 'y', 'next_return', 'forward_return', 'ret_1', 'ret_1m', 'ret_12m', 'future_return', 'returns_1d']
        target_col = None
        for tc in target_cols:
            for col in numeric_data.columns:
                if tc.lower() in col.lower() and col.lower() not in [ec.lower() for ec in exclude_cols]:
                    target_col = col
                    break
            if target_col:
                break
        if target_col is None:
            potential = [c for c in numeric_data.columns if c.lower() not in [ec.lower() for ec in exclude_cols]]
            target_col = potential[-2] if len(potential) > 1 else (potential[-1] if potential else numeric_data.columns[-1])
        feature_cols = [c for c in numeric_data.columns if c != target_col and c.lower() not in [ec.lower() for ec in exclude_cols]]
        if not feature_cols:
            feature_cols = [c for c in numeric_data.columns if c != target_col]
        if not feature_cols:
            feature_cols = numeric_data.columns[:-1].tolist()
            target_col = numeric_data.columns[-1]
        return numeric_data[feature_cols], numeric_data[target_col]
splitter = CrossSectionalDataSplitter()
data_splits = splitter.split(df)
train_df, val_df = data_splits['train'], data_splits['val']
X_train_df, y_train = splitter.prepare_features_labels(train_df)
X_val_df, y_val = splitter.prepare_features_labels(val_df)
X_train = X_train_df.fillna(0).values.astype(np.float32)
y_train = y_train.fillna(0).values.astype(np.float32)
X_val = X_val_df.fillna(0).values.astype(np.float32)
y_val = y_val.fillna(0).values.astype(np.float32)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
print(f'Loaded fresh data: train {X_train_scaled.shape[0]}, val {X_val_scaled.shape[0]}')


Loaded fresh data: train 18826, val 3408


In [None]:
"""
Train baseline models when models is empty (standalone notebook execution).
Run with: %run -i train_baseline_if_needed.py
Expects in namespace: X_train_scaled, y_train, X_val_scaled, y_val, device, TRAINING_CONFIG,
  RANDOM_SEED, FeatureTokenTransformer, SingleHeadTransformer, nn, torch, np,
  mean_squared_error, r2_score.
Populates: models, training_history.
"""
from sklearn.linear_model import LinearRegression, Ridge

def train_baseline_models():
    global models, training_history
    models = {}
    training_history = {}
    _device = globals().get('device', 'cpu')
    _cfg = globals().get('TRAINING_CONFIG', {})
    _seed = globals().get('RANDOM_SEED', 42)
    tr_cfg = _cfg.get('transformer', {'d_model': 72, 'num_heads': 8, 'num_layers': 2, 'd_ff': 512,
        'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 32, 'epochs': 100, 'patience': 20})

    def _train_transformer(model, X_train, y_train, X_val, y_val):
        model = model.to(_device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=tr_cfg['learning_rate'])
        X_t = torch.FloatTensor(X_train).to(_device)
        y_t = torch.FloatTensor(y_train).to(_device)
        X_v = torch.FloatTensor(X_val).to(_device)
        y_v = torch.FloatTensor(y_val).to(_device)
        nf = model.num_features if hasattr(model, 'num_features') else getattr(model.model, 'num_features', X_train.shape[1])
        if X_train.shape[1] != nf:
            if X_train.shape[1] < nf:
                pad_t = np.zeros((X_train.shape[0], nf - X_train.shape[1]))
                pad_v = np.zeros((X_val.shape[0], nf - X_val.shape[1]))
                X_t = torch.FloatTensor(np.hstack([X_train, pad_t])).to(_device)
                X_v = torch.FloatTensor(np.hstack([X_val, pad_v])).to(_device)
            else:
                X_t = torch.FloatTensor(X_train[:, :nf]).to(_device)
                X_v = torch.FloatTensor(X_val[:, :nf]).to(_device)
        bs = tr_cfg['batch_size']
        best = float('inf')
        pc = 0
        for ep in range(tr_cfg['epochs']):
            model.train()
            for i in range(0, len(X_t), bs):
                optimizer.zero_grad()
                out = model(X_t[i:i+bs])
                out = out[0] if isinstance(out, tuple) else out
                criterion(out.squeeze(), y_t[i:i+bs]).backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
            model.eval()
            with torch.no_grad():
                vp = model(X_v)
                vp = vp[0] if isinstance(vp, tuple) else vp
                vl = criterion(vp.squeeze(), y_v).item()
            if vl < best:
                best, pc = vl, 0
            else:
                pc += 1
                if pc >= tr_cfg['patience']:
                    break
        model.eval()
        with torch.no_grad():
            pred = model(X_v)
            pred = (pred[0] if isinstance(pred, tuple) else pred).squeeze().cpu().numpy()
        return pred

    # OLS
    m = LinearRegression()
    m.fit(X_train_scaled, y_train)
    p = m.predict(X_val_scaled)
    models['OLS'] = m
    training_history['OLS'] = {'rmse': np.sqrt(mean_squared_error(y_val, p)), 'r2': r2_score(y_val, p)}

    # Ridge
    m = Ridge(alpha=_cfg.get('ridge', {}).get('alpha', 1.0), random_state=_seed)
    m.fit(X_train_scaled, y_train)
    p = m.predict(X_val_scaled)
    models['Ridge'] = m
    training_history['Ridge'] = {'rmse': np.sqrt(mean_squared_error(y_val, p)), 'r2': r2_score(y_val, p)}

    # XGBoost
    try:
        import xgboost as xgb
        m = xgb.XGBRegressor(n_estimators=100, max_depth=6, learning_rate=0.1, subsample=0.8,
            colsample_bytree=0.8, random_state=_seed, objective='reg:squarederror', eval_metric='rmse')
        m.fit(X_train_scaled, y_train, eval_set=[(X_val_scaled, y_val)], verbose=False)
        p = m.predict(X_val_scaled)
        models['XGBoost'] = m
        training_history['XGBoost'] = {'rmse': np.sqrt(mean_squared_error(y_val, p)), 'r2': r2_score(y_val, p)}
    except ImportError:
        pass

    # MLP
    class MLP(nn.Module):
        def __init__(self):
            super().__init__()
            h = _cfg.get('mlp', {}).get('hidden_dims', [128, 64])
            layers = []
            prev = X_train_scaled.shape[1]
            for d in h:
                layers += [nn.Linear(prev, d), nn.ReLU(), nn.Dropout(0.1)]
                prev = d
            layers.append(nn.Linear(prev, 1))
            self.net = nn.Sequential(*layers)
        def forward(self, x):
            return self.net(x).squeeze(-1)
    m = MLP().to(_device)
    opt = torch.optim.Adam(m.parameters(), lr=_cfg.get('mlp', {}).get('learning_rate', 0.001))
    Xt = torch.FloatTensor(X_train_scaled).to(_device)
    yt = torch.FloatTensor(y_train).to(_device)
    Xv = torch.FloatTensor(X_val_scaled).to(_device)
    for _ in range(min(_cfg.get('mlp', {}).get('epochs', 100), 50)):
        m.train()
        opt.zero_grad()
        torch.nn.functional.mse_loss(m(Xt), yt).backward()
        opt.step()
    m.eval()
    with torch.no_grad():
        p = m(Xv).cpu().numpy()
    models['MLP'] = m
    training_history['MLP'] = {'rmse': np.sqrt(mean_squared_error(y_val, p)), 'r2': r2_score(y_val, p)}

    # Transformers
    nf = X_train_scaled.shape[1]
    for name, cls, kw in [
        ('Single-Head', SingleHeadTransformer, {'num_features': nf, 'd_model': tr_cfg['d_model'], 'num_layers': tr_cfg['num_layers']}),
        ('Multi-Head', FeatureTokenTransformer, {'num_features': nf, 'd_model': tr_cfg['d_model'], 'num_heads': tr_cfg['num_heads'],
            'num_layers': tr_cfg['num_layers'], 'd_ff': tr_cfg['d_ff'], 'dropout': tr_cfg['dropout'], 'use_head_diversity': False}),
        ('Multi-Head Diversity', FeatureTokenTransformer, {'num_features': nf, 'd_model': tr_cfg['d_model'], 'num_heads': tr_cfg['num_heads'],
            'num_layers': tr_cfg['num_layers'], 'd_ff': tr_cfg['d_ff'], 'dropout': tr_cfg['dropout'],
            'use_head_diversity': True, 'diversity_weight': 0.01}),
    ]:
        mdl = cls(**kw)
        pred = _train_transformer(mdl, X_train_scaled, y_train, X_val_scaled, y_val)
        models[name] = mdl
        training_history[name] = {'rmse': np.sqrt(mean_squared_error(y_val, pred)), 'r2': r2_score(y_val, pred)}

    print(f"✓ Trained {len(models)} baseline models (standalone mode)")

if __name__ == '__main__':
    g = globals()
    if 'models' not in g or not isinstance(g.get('models'), dict) or len(g.get('models', {})) == 0:
        train_baseline_models()
    else:
        print("✓ Baseline models already loaded, skipping training")

✓ Trained 7 baseline models (standalone mode)


In [None]:
# Standalone: train baseline models if not yet loaded (run 02 or pipeline first)
if len(models) == 0:
    try:
        get_ipython().run_line_magic('run', '-i train_baseline_if_needed.py')
    except Exception as e:
        print(f" Could not run train_baseline_if_needed: {e}. Run notebook 02 first.")
else:
    print(f" Using {len(models)} baseline models from prior run")

 Using 7 baseline models from prior run


In [None]:
epochs = n_epochs = num_epochs = 100  # full training (standalone mode)


In [None]:
# Adversarial Attack Implementations (A1-A4)
def apply_a1_attack(X, epsilon=0.01):
    """A1: Measurement Error - bounded perturbations."""
    noise = np.random.normal(0, epsilon, X.shape)
    # Scale noise by feature standard deviation
    feature_std = np.std(X, axis=0, keepdims=True) + 1e-8
    noise = noise * feature_std
    return X + noise


def apply_a2_attack(X, missing_rate=0.1):
    """A2: Missingness/Staleness - set random features to zero."""
    X_adv = X.copy()
    n_samples, n_features = X.shape
    n_missing = int(n_features * missing_rate)

    for i in range(n_samples):
        missing_indices = np.random.choice(n_features, n_missing, replace=False)
        X_adv[i, missing_indices] = 0.0

    return X_adv


def apply_a3_attack(X, epsilon=0.01):
    """A3: Rank Manipulation - cross-sectional perturbation preserving ranks."""
    X_adv = X.copy()
    n_samples = X.shape[0]

    # Add small random perturbation that preserves relative ordering
    for i in range(n_samples):
        perturbation = np.random.normal(0, epsilon, X.shape[1])
        # Scale by feature std to maintain relative magnitudes
        feature_std = np.std(X[i], axis=0) + 1e-8
        perturbation = perturbation * feature_std
        X_adv[i] = X[i] + perturbation

    return X_adv


def apply_a4_attack(X, epsilon=1.0):
    """A4: Regime Shift - distribution shift attack."""
    # A4 simulates regime shift by scaling volatility
    # epsilon acts as volatility multiplier
    X_adv = X.copy()
    feature_std = np.std(X, axis=0, keepdims=True) + 1e-8
    # Generate noise with std = epsilon, then scale by feature std
    noise = np.random.normal(0, epsilon, X.shape) * feature_std
    X_adv = X + noise
    return X_adv





In [None]:
# Bootstrap Confidence Intervals for R² and Robustness Metrics
from scipy import stats

def bootstrap_confidence_interval(data, n_bootstrap=1000, confidence=0.95, method='percentile'):
    """
    Compute bootstrap confidence interval for a metric.

    Parameters:
    -----------
    data : array-like
        Sample data
    n_bootstrap : int
        Number of bootstrap samples
    confidence : float
        Confidence level (e.g., 0.95 for 95% CI)
    method : str
        'percentile' or 'bca' (bias-corrected and accelerated)

    Returns:
    --------
    mean : float
        Mean of the data
    std : float
        Standard error (standard deviation)
    ci_lower : float
        Lower bound of confidence interval
    ci_upper : float
        Upper bound of confidence interval
    """
    data = np.array(data)
    n = len(data)
    alpha = 1 - confidence

    # Bootstrap samples
    bootstrap_samples = []
    for _ in range(n_bootstrap):
        # Resample with replacement
        indices = np.random.choice(n, size=n, replace=True)
        bootstrap_samples.append(np.mean(data[indices]))

    bootstrap_samples = np.array(bootstrap_samples)

    # Compute statistics
    mean = np.mean(data)
    std = np.std(bootstrap_samples)  # Standard error

    if method == 'percentile':
        ci_lower = np.percentile(bootstrap_samples, 100 * alpha / 2)
        ci_upper = np.percentile(bootstrap_samples, 100 * (1 - alpha / 2))
    else:  # bca method
        # Bias-corrected and accelerated bootstrap
        z0 = stats.norm.ppf(np.mean(bootstrap_samples < mean))
        # Acceleration (simplified - using jackknife)
        jackknife_means = []
        for i in range(n):
            jackknife_data = np.delete(data, i)
            jackknife_means.append(np.mean(jackknife_data))
        jackknife_means = np.array(jackknife_means)
        a = np.sum((np.mean(jackknife_means) - jackknife_means)**3) / (6 * np.sum((np.mean(jackknife_means) - jackknife_means)**2)**1.5)

        # BCa adjustment
        z_alpha_lower = stats.norm.ppf(alpha / 2)
        z_alpha_upper = stats.norm.ppf(1 - alpha / 2)
        z_lower = z0 + (z0 + z_alpha_lower) / (1 - a * (z0 + z_alpha_lower))
        z_upper = z0 + (z0 + z_alpha_upper) / (1 - a * (z0 + z_alpha_upper))

        ci_lower = np.percentile(bootstrap_samples, 100 * stats.norm.cdf(z_lower))
        ci_upper = np.percentile(bootstrap_samples, 100 * stats.norm.cdf(z_upper))

    return {
        'mean': mean,
        'std': std,
        'ci_lower': ci_lower,
        'ci_upper': ci_upper,
        'confidence': confidence
    }

def compute_r2_with_ci(y_true, y_pred, n_bootstrap=1000, confidence=0.95):
    """
    Compute R² with bootstrap confidence interval.

    Parameters:
    -----------
    y_true : array-like
        True target values
    y_pred : array-like
        Predicted values
    n_bootstrap : int
        Number of bootstrap samples
    confidence : float
        Confidence level

    Returns:
    --------
    dict with r2, std, ci_lower, ci_upper
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    n = len(y_true)

    # Original R²
    r2_original = r2_score(y_true, y_pred)

    # Bootstrap R² values
    r2_bootstrap = []
    for _ in range(n_bootstrap):
        indices = np.random.choice(n, size=n, replace=True)
        y_true_boot = y_true[indices]
        y_pred_boot = y_pred[indices]
        r2_boot = r2_score(y_true_boot, y_pred_boot)
        r2_bootstrap.append(r2_boot)

    r2_bootstrap = np.array(r2_bootstrap)

    # Compute statistics
    std = np.std(r2_bootstrap)
    alpha = 1 - confidence
    ci_lower = np.percentile(r2_bootstrap, 100 * alpha / 2)
    ci_upper = np.percentile(r2_bootstrap, 100 * (1 - alpha / 2))

    return {
        'r2': r2_original,
        'std': std,
        'ci_lower': ci_lower,
        'ci_upper': ci_upper,
        'confidence': confidence
    }

print(" Bootstrap confidence interval functions loaded")

 Bootstrap confidence interval functions loaded


In [None]:
# Enhanced evaluation function with confidence intervals
def evaluate_model_under_attack_with_ci(model, model_name, X_val, y_val, attack_type, epsilon,
                                        device='cpu', is_sklearn=False, num_runs=5, n_bootstrap=1000):
    """
    Evaluate a model under a specific attack with bootstrap confidence intervals.

    Returns:
    --------
    dict with metrics including confidence intervals for robustness
    """
    # Set model to eval mode
    if not is_sklearn:
        model.eval()
        for module in model.modules():
            if isinstance(module, nn.Dropout):
                module.eval()

    # Make clean predictions
    if is_sklearn:
        y_pred_clean = model.predict(X_val)
    else:
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X_val).to(device)
            output = model(X_tensor)
            if isinstance(output, tuple):
                y_pred_tensor = output[0]
            else:
                y_pred_tensor = output
            y_pred_clean = y_pred_tensor.cpu().numpy().flatten()

    # Calculate clean RMSE and R² with CI
    clean_rmse = np.sqrt(mean_squared_error(y_val, y_pred_clean))
    r2_stats = compute_r2_with_ci(y_val, y_pred_clean, n_bootstrap=n_bootstrap)

    # Run attack multiple times and collect robustness values
    robustness_values = []
    adv_rmses = []

    for run in range(num_runs):
        # Apply attack
        if attack_type == 'a1':
            X_adv = apply_a1_attack(X_val, epsilon=epsilon)
        elif attack_type == 'a2':
            missing_rate = min(epsilon / 10.0, 0.8)
            X_adv = apply_a2_attack(X_val, missing_rate=missing_rate)
        elif attack_type == 'a3':
            X_adv = apply_a3_attack(X_val, epsilon=epsilon)
        elif attack_type == 'a4':
            X_adv = apply_a4_attack(X_val, epsilon=epsilon)
        else:
            X_adv = X_val.copy()

        # Make adversarial predictions
        if is_sklearn:
            y_pred_adv = model.predict(X_adv)
        else:
            with torch.no_grad():
                X_adv_tensor = torch.FloatTensor(X_adv).to(device)
                output_adv = model(X_adv_tensor)
                if isinstance(output_adv, tuple):
                    y_pred_adv_tensor = output_adv[0]
                else:
                    y_pred_adv_tensor = output_adv
                y_pred_adv = y_pred_adv_tensor.cpu().numpy().flatten()

        # Calculate adversarial RMSE
        adv_rmse = np.sqrt(mean_squared_error(y_val, y_pred_adv))
        adv_rmses.append(adv_rmse)

        # Calculate robustness for this run
        delta_rmse = adv_rmse - clean_rmse
        if clean_rmse > 0:
            robustness = min(1.0, 1.0 - (delta_rmse / clean_rmse))
        else:
            robustness = 1.0
        robustness_values.append(robustness)

    # Average across runs
    avg_adv_rmse = np.mean(adv_rmses)
    delta_rmse = avg_adv_rmse - clean_rmse
    avg_robustness = np.mean(robustness_values)

    # Compute robustness confidence interval
    robustness_ci = bootstrap_confidence_interval(
        robustness_values, n_bootstrap=n_bootstrap, confidence=0.95
    )

    return {
        'clean_rmse': clean_rmse,
        'adv_rmse': avg_adv_rmse,
        'delta_rmse': delta_rmse,
        'robustness': avg_robustness,
        'robustness_std': robustness_ci['std'],
        'robustness_ci_lower': robustness_ci['ci_lower'],
        'robustness_ci_upper': robustness_ci['ci_upper'],
        'r2': r2_stats['r2'],
        'r2_std': r2_stats['std'],
        'r2_ci_lower': r2_stats['ci_lower'],
        'r2_ci_upper': r2_stats['ci_upper']
    }

print(" Enhanced evaluation function with confidence intervals loaded")

 Enhanced evaluation function with confidence intervals loaded


In [None]:
# Adversarial Training Configuration
ADVERSARIAL_CONFIG = {
    'epsilons': [0.25, 0.5, 1.0],  # Attack strengths
    'attacks': ['a1', 'a2', 'a3', 'a4'],  # Attack types
    'robust_weight': 0.3,  # Weight for adversarial loss (0.3 = 30% adversarial, 70% clean)
    'learning_rate': 0.0001,
    'batch_size': 32,
    'epochs': 100,
    'patience': 20,
    'warmup_epochs': 5  # Gradually increase adversarial weight
}

# Store adversarially trained models
adversarial_models = {}
adversarial_training_history = {}

In [None]:
def adversarial_training_step(model, X_batch, y_batch, attack_type, epsilon,
                             optimizer, device='cpu', robust_weight=0.3):
    """
    Perform one adversarial training step.

    Args:
        model: The model to train
        X_batch: Input batch (numpy array)
        y_batch: Target batch (numpy array)
        attack_type: 'a1', 'a2', 'a3', or 'a4'
        epsilon: Attack strength
        optimizer: Optimizer
        device: Device to use
        robust_weight: Weight for adversarial loss

    Returns:
        Dictionary with loss values or None if batch is invalid
    """
    model.train()
    optimizer.zero_grad()

    # Convert to tensors
    X_tensor = torch.FloatTensor(X_batch).to(device)
    y_tensor = torch.FloatTensor(y_batch).to(device)

    # Clean forward pass
    output_clean = model(X_tensor)
    if isinstance(output_clean, tuple):
        y_pred_clean = output_clean[0]
    else:
        y_pred_clean = output_clean

    # Check for NaN/Inf in predictions
    if torch.any(torch.isnan(y_pred_clean)) or torch.any(torch.isinf(y_pred_clean)):
        return None

    clean_loss = nn.MSELoss()(y_pred_clean.squeeze(), y_tensor)

    # Check if clean_loss is valid
    if torch.isnan(clean_loss) or torch.isinf(clean_loss):
        return None

    # Generate adversarial examples
    if attack_type == 'a1':
        X_adv = apply_a1_attack(X_batch, epsilon=epsilon)
    elif attack_type == 'a2':
        # For A2, epsilon controls missing rate
        missing_rate = min(epsilon / 10.0, 0.8)  # Convert epsilon to missing rate
        X_adv = apply_a2_attack(X_batch, missing_rate=missing_rate)
    elif attack_type == 'a3':
        X_adv = apply_a3_attack(X_batch, epsilon=epsilon)
    elif attack_type == 'a4':
        X_adv = apply_a4_attack(X_batch, epsilon=epsilon)
    else:
        raise ValueError(f"Unknown attack type: {attack_type}")

    # Adversarial forward pass
    X_adv_tensor = torch.FloatTensor(X_adv).to(device)
    output_adv = model(X_adv_tensor)
    if isinstance(output_adv, tuple):
        y_pred_adv = output_adv[0]
    else:
        y_pred_adv = output_adv

    # Check for NaN/Inf in adversarial predictions
    if torch.any(torch.isnan(y_pred_adv)) or torch.any(torch.isinf(y_pred_adv)):
        return None

    adv_loss = nn.MSELoss()(y_pred_adv.squeeze(), y_tensor)

    # Check if adv_loss is valid
    if torch.isnan(adv_loss) or torch.isinf(adv_loss):
        return None

    # Combined loss
    total_loss = (1 - robust_weight) * clean_loss + robust_weight * adv_loss

    # Check if total_loss is valid before backward pass
    if torch.isnan(total_loss) or torch.isinf(total_loss):
        return None

    # Ensure total_loss requires gradients
    if not total_loss.requires_grad:
        return None

    # Backward pass with error handling
    try:
        total_loss.backward()
    except RuntimeError as e:
        if "does not require grad" in str(e) or "does not have a grad_fn" in str(e):
            optimizer.zero_grad()
            return None
        else:
            raise

    # Gradient clipping to prevent exploding gradients
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

    optimizer.step()

    return {
        'clean_loss': clean_loss.item(),
        'adversarial_loss': adv_loss.item(),
        'total_loss': total_loss.item()
    }

In [None]:
def train_adversarial_model(model, model_name, X_train, y_train, X_val, y_val,
                           attack_type, epsilon, config, device='cpu'):
    """
    Train model with adversarial training.

    Args:
        model: Model to train (will be copied)
        model_name: Name of the model
        X_train: Training features
        y_train: Training targets
        X_val: Validation features
        y_val: Validation targets
        attack_type: 'a1', 'a2', 'a3', or 'a4'
        epsilon: Attack strength
        config: Training configuration
        device: Device to use

    Returns:
        Trained model, predictions, and training history
    """
    # Create a fresh copy of the model for adversarial training
    import copy
    model = copy.deepcopy(model)
    model = model.to(device)
    model.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, min_lr=1e-6
    )

    # Convert to tensors
    X_train_tensor = torch.FloatTensor(X_train).to(device)
    y_train_tensor = torch.FloatTensor(y_train).to(device)
    X_val_tensor = torch.FloatTensor(X_val).to(device)
    y_val_tensor = torch.FloatTensor(y_val).to(device)

    # Handle feature dimension mismatch
    num_features = model.num_features if hasattr(model, 'num_features') else model.model.num_features

    if X_train.shape[1] != num_features:
        if X_train.shape[1] < num_features:
            # Pad
            padding_train = np.zeros((X_train.shape[0], num_features - X_train.shape[1]))
            padding_val = np.zeros((X_val.shape[0], num_features - X_val.shape[1]))
            X_train_tensor = torch.FloatTensor(np.hstack([X_train, padding_train])).to(device)
            X_val_tensor = torch.FloatTensor(np.hstack([X_val, padding_val])).to(device)
        else:
            # Truncate
            X_train_tensor = torch.FloatTensor(X_train[:, :num_features]).to(device)
            X_val_tensor = torch.FloatTensor(X_val[:, :num_features]).to(device)

    history = {
        'train_loss': [],
        'val_loss': [],
        'train_clean_loss': [],
        'train_adv_loss': []
    }

    best_val_loss = float('inf')
    patience_counter = 0
    warmup_epochs = config.get('warmup_epochs', 5)

    batch_size = config['batch_size']
    n_batches = (len(X_train_tensor) + batch_size - 1) // batch_size

    for epoch in range(config['epochs']):
        # Gradual warmup: increase robust_weight from 0.1 to target value
        if epoch < warmup_epochs:
            current_robust_weight = 0.1 + (config['robust_weight'] - 0.1) * (epoch / warmup_epochs)
        else:
            current_robust_weight = config['robust_weight']

        epoch_losses = {'clean': [], 'adv': [], 'total': []}

        # Training
        model.train()
        for i in range(0, len(X_train_tensor), batch_size):
            batch_X = X_train_tensor[i:i+batch_size].cpu().numpy()
            batch_y = y_train_tensor[i:i+batch_size].cpu().numpy()

            losses = adversarial_training_step(
                model, batch_X, batch_y, attack_type, epsilon,
                optimizer, device, current_robust_weight
            )

            # Skip batch if None (invalid batch)
            if losses is None:
                continue

            # Check for NaN/Inf in losses
            if (np.isnan(losses['total_loss']) or np.isinf(losses['total_loss']) or
                np.isnan(losses['clean_loss']) or np.isinf(losses['clean_loss']) or
                np.isnan(losses['adversarial_loss']) or np.isinf(losses['adversarial_loss'])):
                continue

            epoch_losses['clean'].append(losses['clean_loss'])
            epoch_losses['adv'].append(losses['adversarial_loss'])
            epoch_losses['total'].append(losses['total_loss'])

        # Skip epoch if all losses are invalid
        if len(epoch_losses['total']) == 0:
            continue

        # Validation
        model.eval()
        with torch.no_grad():
            output_val = model(X_val_tensor)
            if isinstance(output_val, tuple):
                y_pred_val = output_val[0]
            else:
                y_pred_val = output_val

            # Check for constant predictions (model collapse detection)
            y_pred_np = y_pred_val.squeeze().cpu().numpy()
            pred_std = np.std(y_pred_np)

            if pred_std < 1e-8:
                print(f"   MODEL COLLAPSE DETECTED at epoch {epoch+1}!")
                break

            val_loss = nn.MSELoss()(y_pred_val.squeeze(), y_val_tensor).item()

            # Check for NaN/Inf in validation loss
            if np.isnan(val_loss) or np.isinf(val_loss):
                val_loss = float('inf')

        # Record history
        avg_train_loss = np.mean(epoch_losses['total']) if epoch_losses['total'] else float('inf')
        avg_clean_loss = np.mean(epoch_losses['clean']) if epoch_losses['clean'] else 0.0
        avg_adv_loss = np.mean(epoch_losses['adv']) if epoch_losses['adv'] else 0.0

        history['train_loss'].append(avg_train_loss)
        history['val_loss'].append(val_loss)
        history['train_clean_loss'].append(avg_clean_loss)
        history['train_adv_loss'].append(avg_adv_loss)

        # Learning rate scheduling
        scheduler.step(val_loss)

        # Early stopping
        if not (np.isnan(val_loss) or np.isinf(val_loss)):
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= config['patience']:
                    print(f"  {model_name} ({attack_type.upper()}, ε={epsilon}): Early stopping at epoch {epoch+1}")
                    break

        if (epoch + 1) % 10 == 0:
            print(f"  {model_name} ({attack_type.upper()}, ε={epsilon}) - Epoch {epoch+1}/{config['epochs']}: "
                  f"Train Loss={avg_train_loss:.6f}, Val Loss={val_loss:.6f}, "
                  f"Robust Weight={current_robust_weight:.3f}")

    # Final evaluation
    model.eval()
    with torch.no_grad():
        final_pred = model(X_val_tensor)
        if isinstance(final_pred, tuple):
            final_pred = final_pred[0]
        final_pred = final_pred.squeeze().cpu().numpy()

    return model, final_pred, history

In [None]:
# Train adversarially trained models
print("=" * 80)
print("ADVERSARIAL TRAINING FOR TRANSFORMER MODELS")
print("=" * 80)
print(f"Training on attacks: {ADVERSARIAL_CONFIG['attacks']}")
print(f"Epsilons: {ADVERSARIAL_CONFIG['epsilons']}")
print(f"Robust weight: {ADVERSARIAL_CONFIG['robust_weight']}")
print()

# Models to train adversarially
transformer_model_names = ['Single-Head', 'Multi-Head', 'Multi-Head Diversity']
base_models = {
    'Single-Head': models['Single-Head'],
    'Multi-Head': models['Multi-Head'],
    'Multi-Head Diversity': models['Multi-Head Diversity']
}

# Train each model with each attack at each epsilon
for model_name in transformer_model_names:
    print(f"\n{'='*80}")
    print(f"Training {model_name} with Adversarial Training")
    print(f"{'='*80}")

    base_model = base_models[model_name]

    for attack_type in ADVERSARIAL_CONFIG['attacks']:
        for epsilon in ADVERSARIAL_CONFIG['epsilons']:
            model_key = f"{model_name} ({attack_type.upper()}, ε={epsilon})"
            print(f"\nTraining {model_key}...")

            try:
                adv_model, adv_pred, adv_history = train_adversarial_model(
                    base_model, model_name, X_train_scaled, y_train,
                    X_val_scaled, y_val, attack_type, epsilon,
                    ADVERSARIAL_CONFIG, device
                )

                # Evaluate
                adv_rmse = np.sqrt(mean_squared_error(y_val, adv_pred))
                adv_r2 = r2_score(y_val, adv_pred)

                adversarial_models[model_key] = adv_model
                adversarial_training_history[model_key] = {
                    'rmse': adv_rmse,
                    'r2': adv_r2,
                    'history': adv_history
                }

                print(f"  {model_key} trained - RMSE: {adv_rmse:.6f}, R²: {adv_r2:.6f}")

            except Exception as e:
                print(f" Error training {model_key}: {e}")
                import traceback
                traceback.print_exc()

print("\n" + "=" * 80)
print("ADVERSARIAL TRAINING COMPLETE")
print("=" * 80)
print(f"Total adversarially trained models: {len(adversarial_models)}")

ADVERSARIAL TRAINING FOR TRANSFORMER MODELS
Training on attacks: ['a1', 'a2', 'a3', 'a4']
Epsilons: [0.25, 0.5, 1.0]
Robust weight: 0.3


Training Single-Head with Adversarial Training

Training Single-Head (A1, ε=0.25)...
  Single-Head (A1, ε=0.25) - Epoch 10/100: Train Loss=0.000420, Val Loss=0.000268, Robust Weight=0.300
  Single-Head (A1, ε=0.25) - Epoch 20/100: Train Loss=0.000381, Val Loss=0.000274, Robust Weight=0.300
  Single-Head (A1, ε=0.25): Early stopping at epoch 26
  Single-Head (A1, ε=0.25) trained - RMSE: 0.016570, R²: 0.098650

Training Single-Head (A1, ε=0.5)...
  Single-Head (A1, ε=0.5) - Epoch 10/100: Train Loss=0.000432, Val Loss=0.000270, Robust Weight=0.300
  Single-Head (A1, ε=0.5) - Epoch 20/100: Train Loss=0.000402, Val Loss=0.000271, Robust Weight=0.300
  Single-Head (A1, ε=0.5): Early stopping at epoch 24
  Single-Head (A1, ε=0.5) trained - RMSE: 0.016513, R²: 0.104793

Training Single-Head (A1, ε=1.0)...
  Single-Head (A1, ε=1.0) - Epoch 10/100: Train Loss=