In [1]:
import itertools
import joblib
import pickle 

import networkx as nx
import numpy as np
import pandas as pd
import cvxpy as cp

import matplotlib.pyplot as plt
import matplotlib.pylab as pl
import seaborn as sns
import random 
import joblib

import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import evaluation_utils as evut
import opt_utils as oput
import params

np.random.seed(0)



In [2]:
experiment = 'battery_discrete'

In [3]:
M_base = joblib.load('batteries/scms/M_WMG_bins_5_avg_2.pkl')
M_abst = joblib.load('batteries/scms/M_LRCS_bins_5.pkl') 

In [4]:
df_base = joblib.load('batteries/dfs/df_WMG_bins_5_avg_2.pkl')
df_abst = joblib.load('batteries/dfs/df_LRCS_bins_5.pkl')

In [5]:
df_base.drop(df_base.columns[[1,2]], axis=1, inplace=True)
df_base.replace({75:0, 110:1, 150:2, 170:3, 180:4, 200:5}, inplace=True)

df_abst.drop(df_abst.columns[[1]], axis=1, inplace=True)
df_abst.replace({75:0, 100:1, 200:2}, inplace=True)

# Rename columns to match graph
df_base = df_base.rename(columns={
    'binned ML_avg0': 'ML0',
    'binned ML_avg1': 'ML1'
})
# Rename columns to match graph
df_abst = df_abst.rename(columns={
    'Comma gap (µm)': 'CG', 'binned ML': 'ML'
})

In [6]:
Gll = nx.DiGraph()
Gll.add_nodes_from(M_base.nodes())
Gll.add_edges_from(M_base.edges())
Ghl = nx.DiGraph()
Ghl.add_nodes_from(M_abst.nodes())
Ghl.add_edges_from(M_abst.edges())

In [7]:
test_size = 0.1

df_base_train, df_base_test = train_test_split(df_base, test_size=test_size, random_state=42)
df_abst_train, df_abst_test = train_test_split(df_abst, test_size=test_size, random_state=42)

# Get coefficients using the modularised_utils function
ll_coeffs = mut.get_coefficients(df_base_train.to_numpy(), Gll)
hl_coeffs = mut.get_coefficients(df_abst_train.to_numpy(), Ghl)

Gll = CBN(list(ll_coeffs.keys()))
Ghl = CBN(list(hl_coeffs.keys()))

In [42]:
joblib.dump(df_base_test, f"data/{experiment}/df_base_test.pkl")
joblib.dump(df_abst_test, f"data/{experiment}/df_abst_test.pkl")
# joblib.dump(df_base_test, f"data/{experiment}/Dll_obs_test.pkl")
# joblib.dump(df_abst_test, f"data/{experiment}/Dhl_obs_test.pkl")


['data/battery_discrete/df_abst_test.pkl']

In [8]:
num_llsamples, l = df_base_train.shape
num_hlsamples, h = df_abst_train.shape
min_samples = min(num_llsamples, num_hlsamples)

df_base_train = df_base_train[:min_samples]
df_abst_train = df_abst_train[:min_samples]

df_base_train = df_base_train.to_numpy()
df_abst_train = df_abst_train.to_numpy()

l = len(Gll.nodes())
h = len(Ghl.nodes())

In [9]:
def map_n_bin_old(T, df_base, df_abst):
    """
    Transform base samples and bin them to match abstract domain
    
    Args:
        T: transformation matrix
        df_base: base model data (already numpy array)
        df_abst: abstract model data (already numpy array)
    
    Returns:
        binned_samples: transformed and binned samples matching abstract domain
    """
    # Apply transformation
    continuous_samples = T @ df_base.T  # This gives us continuous values
    
    # Get the unique values in abstract domain to understand our target bins
    abst_unique_values = np.sort(np.unique(df_abst, axis=0), axis=0)
    
    # Create bins for each dimension
    binned_samples = np.zeros_like(continuous_samples)
    
    for dim in range(continuous_samples.shape[0]):
        # Get unique values for this dimension
        unique_vals = np.unique(df_abst[:, dim])
        n_bins = len(unique_vals)
        
        # Create bin edges using percentiles of the continuous data
        bin_edges = np.percentile(continuous_samples[dim], 
                                np.linspace(0, 100, n_bins + 1))
        
        # Ensure unique bin edges
        bin_edges = np.unique(bin_edges)
        if len(bin_edges) < n_bins + 1:
            # If we don't have enough unique edges, create artificial ones
            bin_edges = np.linspace(continuous_samples[dim].min(),
                                  continuous_samples[dim].max(),
                                  n_bins + 1)
        
        # Digitize the continuous values into bins
        bin_indices = np.digitize(continuous_samples[dim], bin_edges[1:-1])
        
        # Map bin indices to abstract domain values
        binned_samples[dim] = unique_vals[bin_indices]
    
    return binned_samples.T


In [10]:
def map_n_bin(T, df_base, df_abst):
    """
    Transform base samples and bin them using fixed bin edges from df_abst.
    
    Args:
        T: transformation matrix
        df_base: base model data (numpy array)
        df_abst: abstract model data (numpy array)
    
    Returns:
        binned_samples: transformed and binned samples matching abstract domain
    """
    # Apply transformation
    continuous_samples = T @ df_base.T  # (d, N)

    # Precompute fixed bin edges from df_abst
    abst_unique = np.sort(np.unique(df_abst, axis=0), axis=0)

    # We assume:
    # - Dimension 0: CG (control gap) -> use discrete values (no binning needed, handled separately)
    # - Dimension 1: ML (mass loading) -> use percentile bins from df_abst

    binned_samples = np.zeros_like(continuous_samples)

    # Dimension 0: CG
    unique_cg = np.unique(df_abst[:, 0])
    n_bins_cg = len(unique_cg)
    
    # Manual mapping for CG later after transformation
    # So no binning for CG here!

    # Dimension 1: ML
    unique_ml = np.unique(df_abst[:, 1])
    n_bins_ml = len(unique_ml)
    
    # Create bin edges for ML using df_abst
    ml_values = df_abst[:, 1]
    bin_edges_ml = np.percentile(ml_values, np.linspace(0, 100, n_bins_ml + 1))
    bin_edges_ml = np.unique(bin_edges_ml)
    if len(bin_edges_ml) < n_bins_ml + 1:
        bin_edges_ml = np.linspace(ml_values.min(), ml_values.max(), n_bins_ml + 1)

    # Now bin the samples
    # (0) CG: leave as continuous now, mapping will happen separately
    binned_samples[0] = continuous_samples[0]  # keep CG for now (later mapped)

    # (1) ML: use fixed bin edges
    bin_indices_ml = np.digitize(continuous_samples[1], bin_edges_ml[1:-1])
    binned_samples[1] = unique_ml[bin_indices_ml]

    return binned_samples.T


In [11]:
U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(df_base_train, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(df_abst_train, Ghl, hl_coeffs)

In [12]:
# Base-level interventions 
iota0 = None
# iota1 = ops.Intervention({'CG': 75.})
# iota2 = ops.Intervention({'CG': 110.})
# iota3 = ops.Intervention({'CG': 180.})
# iota4 = ops.Intervention({'CG': 200.})
iota1 = ops.Intervention({'CG': 0})
iota2 = ops.Intervention({'CG': 1})
iota3 = ops.Intervention({'CG': 4})
iota4 = ops.Intervention({'CG': 5})

# Abstract-level interventions 
iota0_prime = None
# iota1_prime = ops.Intervention({'CG': 75.})
# iota2_prime = ops.Intervention({'CG': 100.})
# iota3_prime = ops.Intervention({'CG': 200.})
iota1_prime = ops.Intervention({'CG': 0})
iota2_prime = ops.Intervention({'CG': 1})
iota3_prime = ops.Intervention({'CG': 2})


# Mapping
omega = {
    iota0: iota0_prime,
    iota1: iota1_prime,
    iota2: iota2_prime,
    iota3: iota3_prime,
    iota4: iota3_prime
}

Ill = list(set(omega.keys()))
Ihl = list(set(omega.values()))


In [47]:
Ds = {}
Ds[None] = (df_base_train, df_abst_train)
    
joblib.dump((Gll, Ill), f"data/{experiment}/LL.pkl")
joblib.dump(ll_coeffs, f"data/{experiment}/ll_coeffs.pkl")

joblib.dump((Ghl, Ihl), f"data/{experiment}/HL.pkl")
joblib.dump(hl_coeffs, f"data/{experiment}/hl_coeffs.pkl")

joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

joblib.dump(omega, f"data/{experiment}/omega.pkl")
joblib.dump((U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat), f"data/{experiment}/exogenous_HL.pkl")

['data/battery_discrete/exogenous_HL.pkl']

In [48]:
LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)

HLmodels = {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

In [49]:
joblib.dump(LLmodels, f"data/{experiment}/LLmodels.pkl")
joblib.dump(HLmodels, f"data/{experiment}/HLmodels.pkl")

['data/battery_discrete/HLmodels.pkl']

In [26]:
L_matrices = oput.compute_struc_matrices(LLmodels, Ill)
H_matrices = oput.compute_struc_matrices(HLmodels, Ihl)

In [13]:
ll_bound = round(evut.compute_empirical_radius(N=num_llsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=l), 3)
hl_bound = round(evut.compute_empirical_radius(N=num_hlsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=h), 3)

In [14]:
print(ll_bound, hl_bound)

0.358 0.417


In [70]:
epsilon, delta = ll_bound, hl_bound

eta_max = 0.001
eta_min = 0.001

max_iter = 5000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

initialization = 'random' # 'random'

tol  = 1e-4
seed = 23

In [72]:
opt_params_erica = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'epsilon': epsilon,
                        'delta': delta,
                        'eta_min': eta_min,
                        'eta_max': eta_max,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'max_iter': max_iter,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'initialization': initialization,
                        'experiment': 'battery_discrete'
                    }

In [25]:
diroca_train_results_empirical = {}

In [26]:
# Define different epsilon=delta values
eps_delta_values     = [8, ll_bound, 1, 2, 4]

# For each epsilon=delta value
for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")
    # Update theta parameters
    if eps_delta == ll_bound:
        opt_params_erica['epsilon'] = ll_bound
        opt_params_erica['delta']   = hl_bound
    
    else:
        opt_params_erica['epsilon'] = eps_delta
        opt_params_erica['delta']   = eps_delta
    
    # Run ERICA optimization
    params_empirical, T_empirical = oput.run_empirical_erica_optimization_batt(**opt_params_erica)
    
    # Store results including optimization parameters and transformation matrix
    if eps_delta == ll_bound:
        diroca_train_results_empirical['T_'+str(ll_bound)+'-'+str(hl_bound)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }
    else:
        diroca_train_results_empirical['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }

print("\nTraining completed. T matrices stored in trained_results dictionary.")
print("Available ε=δ values:", list(diroca_train_results_empirical.keys()))



Training for ε=δ = 8


 22%|██▏       | 224/1000 [00:11<00:38, 19.93it/s]


Converged at iteration 225
Training for ε=δ = 0.358


 30%|███       | 300/1000 [00:14<00:33, 20.75it/s]


Converged at iteration 301
Training for ε=δ = 1


 27%|██▋       | 272/1000 [00:13<00:34, 20.83it/s]


Converged at iteration 273
Training for ε=δ = 2


 22%|██▏       | 224/1000 [00:10<00:37, 20.54it/s]


Converged at iteration 225
Training for ε=δ = 4


 22%|██▏       | 224/1000 [00:10<00:37, 20.63it/s]


Converged at iteration 225

Training completed. T matrices stored in trained_results dictionary.
Available ε=δ values: ['T_8', 'T_0.358-0.417', 'T_1', 'T_2', 'T_4']


In [73]:
params_enrico, T_enrico = oput.run_empirical_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

 71%|███████   | 3539/5000 [00:17<00:07, 204.68it/s]

Converged at iteration 3540





In [74]:
diroca_train_results_empirical['T_0.00'] = {
                                'optimization_params': params_enrico,
                                'T_matrix': T_enrico
                            }

In [29]:
opt_params_bary = {
                        'U_ll_hat':U_ll_hat,
                        'U_hl_hat':U_hl_hat,
                        'L_matrices':L_matrices,
                        'H_matrices':H_matrices,
                        'max_iter':1000,
                        'tol':tol,
                        'seed':seed
                    }
                                 

In [30]:
T_bary = oput.run_empirical_bary_optim(**opt_params_bary)
params_bary = {'L':{}, 'H':{}}

100%|██████████| 1000/1000 [00:01<00:00, 740.78it/s]


In [31]:
diroca_train_results_empirical['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }

In [32]:
opt_params_smooth = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'eta_min': eta_min,
                        'num_steps_min': num_steps_min,
                        'max_iter': 300, #300
                        'tol': tol,
                        'seed': seed,
                        'noise_sigma': 0.1, #0.1
                        'num_noise_samples': 10
                        }

In [33]:
params_smooth, T_smooth = oput.run_empirical_smooth_optimization_batt(**opt_params_smooth)

100%|██████████| 300/300 [00:49<00:00,  6.10it/s]


In [34]:
diroca_train_results_empirical['T_s'] = {
                                'optimization_params': params_smooth,
                                'T_matrix': T_smooth
                            }

In [35]:
linabs_results = evut.run_abs_lingam_complete(df_base_train, df_abst_train)

In [36]:
diroca_train_results_empirical['T_pa'] = {'optimization_params':{'L':{'pert_U':U_ll_hat},'H':{'pert_U':U_hl_hat}}, 'T_matrix': linabs_results['Perfect']['T'].T}
diroca_train_results_empirical['T_na'] = {'optimization_params':{'L':{'pert_U':U_ll_hat},'H':{'pert_U':U_hl_hat}}, 'T_matrix': linabs_results['Noisy']['T'].T}

In [37]:
joblib.dump(diroca_train_results_empirical, f"data/{experiment}/diroca_train_results_empirical.pkl")

['data/battery_discrete/diroca_train_results_empirical.pkl']

In [30]:
diroca_train_results_empirical = joblib.load(f"data/{experiment}/diroca_train_results_empirical.pkl")

# Downstream Evaluation

In [110]:
def map_n_bin_fixed(T, df_base, df_abst):
    """
    Transform base samples and bin them using fixed bin edges from df_abst.
    
    Args:
        T: transformation matrix
        df_base: base model data (numpy array)
        df_abst: abstract model data (numpy array)
    
    Returns:
        binned_samples: transformed and binned samples matching abstract domain
    """
    # Apply transformation
    continuous_samples = T @ df_base.T  # (d, N)

    # Precompute fixed bin edges from df_abst
    abst_unique = np.sort(np.unique(df_abst, axis=0), axis=0)

    # We assume:
    # - Dimension 0: CG (control gap) -> use discrete values (no binning needed, handled separately)
    # - Dimension 1: ML (mass loading) -> use percentile bins from df_abst

    binned_samples = np.zeros_like(continuous_samples)

    # Dimension 0: CG
    unique_cg = np.unique(df_abst[:, 0])
    n_bins_cg = len(unique_cg)
    
    # Manual mapping for CG later after transformation
    # So no binning for CG here!

    # Dimension 1: ML
    unique_ml = np.unique(df_abst[:, 1])
    n_bins_ml = len(unique_ml)
    
    # Create bin edges for ML using df_abst
    ml_values = df_abst[:, 1]
    bin_edges_ml = np.percentile(ml_values, np.linspace(0, 100, n_bins_ml + 1))
    bin_edges_ml = np.unique(bin_edges_ml)
    if len(bin_edges_ml) < n_bins_ml + 1:
        bin_edges_ml = np.linspace(ml_values.min(), ml_values.max(), n_bins_ml + 1)

    # Now bin the samples
    # (0) CG: leave as continuous now, mapping will happen separately
    binned_samples[0] = continuous_samples[0]  # keep CG for now (later mapped)

    # (1) ML: use fixed bin edges
    bin_indices_ml = np.digitize(continuous_samples[1], bin_edges_ml[1:-1])
    binned_samples[1] = unique_ml[bin_indices_ml]

    return binned_samples.T


In [123]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso

def map_wmg_to_lrcs_cg(wmg_cg_values):
    """
    Map WMG CG values into LRCS CG domain (75, 100, 200).
    This is manual based on the abstraction described.
    """
    mapped_cg = []
    for val in wmg_cg_values:
        if val in [0]:  # WMG 75 mapped to LRCS 75
            mapped_cg.append(0)
        elif val in [1, 2, 3, 4]:  # WMG 110, 150, 170, 180 mapped to LRCS 100
            mapped_cg.append(1)
        elif val in [5]:  # WMG 200 mapped to LRCS 200
            mapped_cg.append(2)
        else:
            raise ValueError(f"Unexpected WMG CG value {val} during mapping.")
    return np.array(mapped_cg)

def downstream_evaluation_fair(T, df_base, df_abst):
    """
    Implements paper's evaluation methodology: Scenarios (a), (b), (c).
    Assumes df_base and df_abst are preprocessed as described.
    """

    # Preprocessing
    df_base_np = df_base.to_numpy()
    df_abst_np = df_abst.to_numpy()

    # Map and bin the base samples
    tau_samples = map_n_bin_old(T, df_base_np, df_abst_np)

    # Map CG values from WMG (base) to LRCS space
    tau_samples[:, 0] = map_wmg_to_lrcs_cg(tau_samples[:, 0].astype(int))

    comma_gaps = np.unique(df_abst_np[:, 0])
    lasso_params = {'alpha': 0.0001, 'max_iter': 500, 'tol': 0.0001}

    # Concatenate LRCS and transported WMG (for scenarios b and c)
    enhanced_data = np.concatenate([df_abst_np, tau_samples])

    results = {'Real': [], 'Aug': [], 'AugReal': []}

    for cg in comma_gaps:
        # Scenario (a): Real only
        train_mask_a = (df_abst_np[:, 0] != cg)
        test_mask_a = (df_abst_np[:, 0] == cg)

        X_train_a = df_abst_np[train_mask_a, 0].reshape(-1, 1)
        y_train_a = df_abst_np[train_mask_a, 1]
        X_test_a = df_abst_np[test_mask_a, 0].reshape(-1, 1)
        y_test_a = df_abst_np[test_mask_a, 1]

        model_a = Lasso(**lasso_params).fit(X_train_a, y_train_a)
        y_pred_a = model_a.predict(X_test_a)
        mse_a = np.mean((y_pred_a - y_test_a) ** 2)
        results['Real'].append(mse_a)

        # Scenario (b): Augmented (full WMG support)
        # Train on LRCS (CG ≠ cg) + all tau_samples
        train_mask_b = np.ones(len(enhanced_data), dtype=bool)
        train_mask_b[:len(df_abst_np)][test_mask_a] = False  # Drop LRCS samples with CG=cg

        X_train_b = enhanced_data[train_mask_b, 0].reshape(-1, 1)
        y_train_b = enhanced_data[train_mask_b, 1]
        X_test_b = df_abst_np[test_mask_a, 0].reshape(-1, 1)
        y_test_b = df_abst_np[test_mask_a, 1]

        model_b = Lasso(**lasso_params).fit(X_train_b, y_train_b)
        y_pred_b = model_b.predict(X_test_b)
        mse_b = np.mean((y_pred_b - y_test_b) ** 2)
        results['Aug'].append(mse_b)

        # Scenario (c): Augmented without support
        test_mask_tau = (tau_samples[:, 0] == cg)

        train_data_c = np.concatenate([
            df_abst_np[~test_mask_a],
            tau_samples[~test_mask_tau]
        ])

        test_data_c = np.concatenate([
            df_abst_np[test_mask_a],
            tau_samples[test_mask_tau]
        ])

        X_train_c = train_data_c[:, 0].reshape(-1, 1)
        y_train_c = train_data_c[:, 1]
        X_test_c = test_data_c[:, 0].reshape(-1, 1)
        y_test_c = test_data_c[:, 1]

        model_c = Lasso(**lasso_params).fit(X_train_c, y_train_c)
        y_pred_c = model_c.predict(X_test_c)
        mse_c = np.mean((y_pred_c - y_test_c) ** 2)
        results['AugReal'].append(mse_c)

    # Aggregate
    final_results = {
        'Real': (np.mean(results['Real']), np.std(results['Real'])),
        'Aug': (np.mean(results['Aug']), np.std(results['Aug'])),
        'AugReal': (np.mean(results['AugReal']), np.std(results['AugReal'])),
    }
    
    return final_results


In [124]:
def print_ordered_results(results_dict, scenario_name):
    # Extract results for the given scenario
    scenario_results = {method: results_dict[method][scenario_name] for method in results_dict.keys()}
    
    # Sort by mean error (first element of the tuple) in descending order (worst to best)
    sorted_results = dict(sorted(scenario_results.items(), key=lambda x: x[1][0], reverse=True))
    
    print(f"\n{scenario_name} Scenario")
    print("="*80)
    print(f"{'Rank':<5} {'Method':<15} {'Error (mean ± std)':<35}")
    print("-"*80)
    
    for rank, (method, (mean, std)) in enumerate(sorted_results.items(), 1):
        print(f"{rank:<5} {method:<15} {mean:>8.4f} ± {std:<8.4f}")

# ========== FIRST compute downstream evaluations ==========
all_results = {}
for method in diroca_train_results_empirical.keys():
    T_matrix = diroca_train_results_empirical[method]['T_matrix']
    eval_result = downstream_evaluation_fair(T_matrix, df_base, df_abst)
    all_results[method] = eval_result

# ========== THEN print for each scenario ==========
print("\nAbstraction Performance Evaluation")
print("="*80)

# Real → Real scenario
print_ordered_results(all_results, 'Real')

# Aug → Real scenario
print_ordered_results(all_results, 'Aug')

# Real+Aug → Real scenario
print_ordered_results(all_results, 'AugReal')

print("\n" + "="*80)



Abstraction Performance Evaluation

Real Scenario
Rank  Method          Error (mean ± std)                 
--------------------------------------------------------------------------------
1     T_8               0.5701 ± 0.2816  
2     T_0.358-0.417     0.5701 ± 0.2816  
3     T_1               0.5701 ± 0.2816  
4     T_2               0.5701 ± 0.2816  
5     T_4               0.5701 ± 0.2816  
6     T_0.00            0.5701 ± 0.2816  
7     T_b               0.5701 ± 0.2816  
8     T_s               0.5701 ± 0.2816  
9     T_pa              0.5701 ± 0.2816  
10    T_na              0.5701 ± 0.2816  

Aug Scenario
Rank  Method          Error (mean ± std)                 
--------------------------------------------------------------------------------
1     T_b               5.0341 ± 6.5736  
2     T_8               3.0630 ± 3.2846  
3     T_2               3.0630 ± 3.2846  
4     T_4               3.0630 ± 3.2846  
5     T_1               1.2565 ± 0.7856  
6     T_pa              1.1

In [52]:
hat_dict = {'L': U_ll_hat, 'H': U_hl_hat}

worst = 'T_8'

U_worst_L = diroca_train_results_empirical[worst]['optimization_params']['L']['pert_U']
U_worst_H = diroca_train_results_empirical[worst]['optimization_params']['H']['pert_U']

worst_dict = {'L': U_worst_L, 'H': U_worst_H}

center = 'hat'
if center == 'hat':
    center_matrix = hat_dict
elif center == 'worst':
    center_matrix = worst_dict

coverage_type='uniform'

In [53]:
rad_values = np.arange(0.0, 10.0, 1).tolist()  
noise_levels = np.arange(0.0, 10.0, 1).tolist()  

In [350]:
df_base = df_base_test
df_abst = df_abst_test

In [84]:
def downstream_evaluation_paper_with_noise(T, df_base, df_abst, rad, noise_level, noise_in):
    """
    Implements the paper's evaluation methodology with three scenarios, with added noise functionality
    """
    # Create noisy copies of the data and convert to float
    df_base = df_base.to_numpy()
    df_abst = df_abst.to_numpy()

    df_base_noisy = df_base.copy().astype(float)
    df_abst_noisy = df_abst.copy().astype(float)

    pert_L = evut.generate_perturbation_matrix(rad, 'boundary', 'L', hat_dict, coverage = coverage_type)
    pert_H = evut.generate_perturbation_matrix(rad, 'boundary', 'H', hat_dict, coverage = coverage_type)

    if noise_level == 0:
        noise_in = 'none'
    # Add noise according to specified parameters
    if noise_in == 'both':
        # meanL = np.random.uniform(-1, 1)  # generates random mean between -1 and 1
        #df_base_noisy += np.random.normal(0, noise_level, df_base.shape)
        #df_base_noisy += (LLmodels[None].F @ (center_matrix['L'] + pert_L).T).T
        
        df_base_noisy += pert_L
        # meanH = np.random.uniform(-1, 1)  # generates random mean between -1 and 1
        # df_abst_noisy += np.random.normal(meanH, noise_level, df_abst.shape)
        #df_abst_noisy += (HLmodels[None].F @ (center_matrix['H'] + pert_H).T).T
        df_abst_noisy += pert_H
        
    elif noise_in == 'base':
        df_base_noisy += np.random.normal(0, noise_level, df_base.shape)
        df_abst_noisy = df_abst.copy().astype(float)

    elif noise_in == 'abst':
        df_abst_noisy += np.random.normal(0, noise_level, df_abst.shape)
        df_base_noisy = df_base.copy().astype(float)
        
    elif noise_in == 'none':
        df_base_noisy = df_base.copy().astype(float)
        df_abst_noisy = df_abst.copy().astype(float)
    
    # Get unique Comma Gap values
    comma_gaps = np.unique(df_abst_noisy[:, 0])
    lasso_params = {'alpha': 0.0001, 'max_iter': 500, 'tol': 0.0001}
    tau_samples = map_n_bin(T, df_base_noisy, df_abst_noisy)
    # Now tau_samples is (num_base_samples x 2)
    
    # Scenario 1: Before abstraction (Real → Real)
    mse_real = []
    for cg in comma_gaps:
        test_mask = (df_abst_noisy[:, 0] == cg)
        train_mask = ~test_mask
        
        X_train = df_abst_noisy[train_mask, 0].reshape(-1, 1)
        y_train = df_abst_noisy[train_mask, 1]
        
        X_test = df_abst_noisy[test_mask, 0].reshape(-1, 1)
        y_test = df_abst_noisy[test_mask, 1]
        
        model = Lasso(**lasso_params).fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mses = (y_pred - y_test)**2
        mean_mse = np.mean(mses)
        std_mse = np.std(mses)
        mse_real.append(mean_mse)
    
    # Scenario 2: After abstraction with support (Aug → Real)
    mse_aug = []
    
    # Concatenate abstract and transformed data
    enhanced_data = np.concatenate([df_abst_noisy, tau_samples])
    
    for cg in comma_gaps:
        # Create test mask for abstract data
        test_mask_abst = (df_abst_noisy[:, 0] == cg)
        
        # Create full training mask
        train_mask_full = np.ones(len(enhanced_data), dtype=bool)
        train_mask_full[:len(df_abst_noisy)][test_mask_abst] = False
        
        # Training data (all transformed samples + non-test abstract samples)
        X_train = enhanced_data[train_mask_full, 0].reshape(-1, 1)
        y_train = enhanced_data[train_mask_full, 1]
        
        # Test data (only from abstract)
        X_test = df_abst_noisy[test_mask_abst, 0].reshape(-1, 1)
        y_test = df_abst_noisy[test_mask_abst, 1]
        
        model = Lasso(**lasso_params).fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mses = (y_pred - y_test)**2
        mean_mse = np.mean(mses)
        std_mse = np.std(mses)
        mse_aug.append(mean_mse)
    
    # Scenario 3: After abstraction without support (Real+Aug → Real)
    mse_mix = []
    
    for cg in comma_gaps:
        test_mask_abst = (df_abst_noisy[:, 0] == cg)
        test_mask_tau = (tau_samples[:, 0] == cg)
        
        # Combine data excluding test samples
        train_data = np.concatenate([
            df_abst_noisy[~test_mask_abst],
            tau_samples[~test_mask_tau]
        ])
        
        X_train = train_data[:, 0].reshape(-1, 1)
        y_train = train_data[:, 1]
        
        X_test = df_abst_noisy[test_mask_abst, 0].reshape(-1, 1)
        y_test = df_abst_noisy[test_mask_abst, 1]
        
        model = Lasso(**lasso_params).fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mses = (y_pred - y_test)**2
        mean_mse = np.mean(mses)
        std_mse = np.std(mses)
        mse_mix.append(mean_mse)
    
    return {
        "Real": (np.mean(mse_real), np.std(mse_real)),
        "Aug": (np.mean(mse_aug), np.std(mse_aug)),
        "AugReal": (np.mean(mse_mix), np.std(mse_mix))
    }

In [85]:
def print_ordered_noisy_results(results_dict, scenario_name, noise_level, noise_in):
    # Extract results for the given scenario
    scenario_results = {method: results_dict[method][scenario_name] for method in results_dict.keys()}
    # Sort by mean error (first element of the tuple) in descending order (worst to best)
    sorted_results = dict(sorted(scenario_results.items(), key=lambda x: x[1][0], reverse=True))
    
    print(f"\n{scenario_name} Scenario (Noise Level: {noise_level}, Applied to: {noise_in})")
    print("="*100)
    print(f"{'Rank':<5} {'Method':<15} {'Error (mean ± CI)':<35}")
    print("-"*100)
    
    for rank, (method, (mean, std)) in enumerate(sorted_results.items(), 1):
        print(f"{rank:<5} {method:<15} {mean:>8.4f} ± {std:<8.4f}")

# Print results for each scenario and noise configuration
def evaluate_and_print_noisy_results(diroca_train_results_empirical, df_base, df_abst, noise_level, noise_in):
    print("\nAbstraction Performance Evaluation with Noise")
    print("="*100)
    print(f"Noise Configuration: Level = {noise_level}, Applied to: {noise_in}")
    print("="*100)

    # Collect all results
    results = {method: downstream_evaluation_paper_with_noise(
        diroca_train_results_empirical[method]['T_matrix'],
        df_base, df_abst, rad=rad,
        noise_level=noise_level,
        noise_in=noise_in
    ) for method in diroca_train_results_empirical.keys()}

    # Print results for each scenario
    print_ordered_noisy_results(results, 'Real', noise_level, noise_in)
    print_ordered_noisy_results(results, 'Aug', noise_level, noise_in)
    print_ordered_noisy_results(results, 'AugReal', noise_level, noise_in)

    print("\n" + "="*100)

In [86]:
def evaluate_and_print_noisy_results(diroca_train_results_empirical, df_base, df_abst, rad, noise_level, noise_in):
    # print("\nAbstraction Performance Evaluation with Noise")
    # print("="*100)
    # print(f"Noise Configuration: Level = {rad}, Applied to: {noise_in}")
    # print("="*100)

    # Initialize dictionaries to store accumulated results
    accumulated_results = {
        'Real': {'mean': [], 'std': []},
        'Aug': {'mean': [], 'std': []},
        'AugReal': {'mean': [], 'std': []}
    }

    # Run 100 iterations
    for _ in range(5):
    #for rad in rad_values:
        # Collect results for each method
        results = {method: downstream_evaluation_paper_with_noise(
            diroca_train_results_empirical[method]['T_matrix'],
            df_base, df_abst, rad=rad,
            noise_level=noise_level,
            noise_in=noise_in
        ) for method in diroca_train_results_empirical.keys()}

        # Accumulate results
        for scenario in ['Real', 'Aug', 'AugReal']:
            for method in results:
                mean, std = results[method][scenario]
                accumulated_results[scenario]['mean'].append(mean)
                accumulated_results[scenario]['std'].append(std)

    # Calculate averages and structure final results
    final_results = {}
    for method in diroca_train_results_empirical.keys():
        method_results = {}
        for scenario in ['Real', 'Aug', 'AugReal']:
            # Get the mean and std for this method across all runs
            means = [accumulated_results[scenario]['mean'][i] for i in range(len(accumulated_results[scenario]['mean'])) if i % len(diroca_train_results_empirical) == list(diroca_train_results_empirical.keys()).index(method)]
            stds = [accumulated_results[scenario]['std'][i] for i in range(len(accumulated_results[scenario]['std'])) if i % len(diroca_train_results_empirical) == list(diroca_train_results_empirical.keys()).index(method)]
            
            method_results[scenario] = (np.mean(means), np.mean(stds))
        final_results[method] = method_results

    # Print results for each scenario
    for scenario in ['Real', 'Aug', 'AugReal']:
        print_ordered_noisy_results(final_results, scenario, noise_level, noise_in)

    print("\n" + "="*100)


for rad in rad_values:
#for noise_level in noise_levels:
    print(f"Rad value: {rad}")
    evaluate_and_print_noisy_results(
        diroca_train_results_empirical,
        df_base,
        df_abst,
        rad = rad,
        noise_level=.1,
        noise_in='none'
    )

Rad value: 0.0

Real Scenario (Noise Level: 0.1, Applied to: none)
Rank  Method          Error (mean ± CI)                  
----------------------------------------------------------------------------------------------------
1     T_8               0.5701 ± 0.2816  
2     T_0.358-0.417     0.5701 ± 0.2816  
3     T_1               0.5701 ± 0.2816  
4     T_2               0.5701 ± 0.2816  
5     T_4               0.5701 ± 0.2816  
6     T_0.00            0.5701 ± 0.2816  
7     T_b               0.5701 ± 0.2816  
8     T_s               0.5701 ± 0.2816  
9     T_pa              0.5701 ± 0.2816  
10    T_na              0.5701 ± 0.2816  

Aug Scenario (Noise Level: 0.1, Applied to: none)
Rank  Method          Error (mean ± CI)                  
----------------------------------------------------------------------------------------------------
1     T_b               4.0604 ± 5.4531  
2     T_8               2.7670 ± 3.2527  
3     T_2               2.7670 ± 3.2527  
4     T_4         