In [36]:
import itertools
import joblib
import pickle 

import networkx as nx
import numpy as np
import pandas as pd
import cvxpy as cp

import matplotlib.pyplot as plt
import matplotlib.pylab as pl
import seaborn as sns
import random 
import joblib

import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import evaluation_utils as evut
import opt_utils as oput
import params

np.random.seed(0)

In [37]:
experiment = 'battery_discrete'

## Preprocessing

In [38]:
M_base = joblib.load('batteries/scms/M_WMG_bins_5_avg_2.pkl')
M_abst = joblib.load('batteries/scms/M_LRCS_bins_5.pkl') 

df_base = joblib.load('batteries/dfs/df_WMG_bins_5_avg_2.pkl')
df_abst = joblib.load('batteries/dfs/df_LRCS_bins_5.pkl')

df_base.drop(df_base.columns[[1,2]], axis=1, inplace=True)
df_base.replace({75:0, 110:1, 150:2, 170:3, 180:4, 200:5}, inplace=True)

df_abst.drop(df_abst.columns[[1]], axis=1, inplace=True)
df_abst.replace({75:0, 100:1, 200:2}, inplace=True)

# Rename columns to match graph
df_base = df_base.rename(columns={
    'binned ML_avg0': 'ML0',
    'binned ML_avg1': 'ML1'
})
# Rename columns to match graph
df_abst = df_abst.rename(columns={
    'Comma gap (µm)': 'CG', 'binned ML': 'ML'
})

In [39]:
Gll = nx.DiGraph()
Gll.add_nodes_from(M_base.nodes())
Gll.add_edges_from(M_base.edges())
Ghl = nx.DiGraph()
Ghl.add_nodes_from(M_abst.nodes())
Ghl.add_edges_from(M_abst.edges())

In [40]:
test_size = 0.1

df_base_train, df_base_test = train_test_split(df_base, test_size=test_size, random_state=42)
df_abst_train, df_abst_test = train_test_split(df_abst, test_size=test_size, random_state=42)

# Get coefficients using the modularised_utils function
ll_coeffs = mut.get_coefficients(df_base_train.to_numpy(), Gll)
hl_coeffs = mut.get_coefficients(df_abst_train.to_numpy(), Ghl)

Gll = CBN(list(ll_coeffs.keys()))
Ghl = CBN(list(hl_coeffs.keys()))

In [None]:
joblib.dump(df_base_test, f"data/{experiment}/df_base_test.pkl")
joblib.dump(df_abst_test, f"data/{experiment}/df_abst_test.pkl")

In [42]:
num_llsamples, l = df_base_train.shape
num_hlsamples, h = df_abst_train.shape
min_samples = min(num_llsamples, num_hlsamples)

df_base_train = df_base_train[:min_samples]
df_abst_train = df_abst_train[:min_samples]

df_base_train = df_base_train.to_numpy()
df_abst_train = df_abst_train.to_numpy()

l = len(Gll.nodes())
h = len(Ghl.nodes())

In [43]:
U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(df_base_train, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(df_abst_train, Ghl, hl_coeffs)

In [44]:
# Low-level interventions 
iota0 = None
iota1 = ops.Intervention({'CG': 0})
iota2 = ops.Intervention({'CG': 1})
iota3 = ops.Intervention({'CG': 4})
iota4 = ops.Intervention({'CG': 5})

# High-level interventions 
iota0_prime = None
iota1_prime = ops.Intervention({'CG': 0})
iota2_prime = ops.Intervention({'CG': 1})
iota3_prime = ops.Intervention({'CG': 2})

# Mapping
omega = {
    iota0: iota0_prime,
    iota1: iota1_prime,
    iota2: iota2_prime,
    iota3: iota3_prime,
    iota4: iota3_prime
}

Ill = list(set(omega.keys()))
Ihl = list(set(omega.values()))

In [45]:
LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)

HLmodels = {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

L_matrices = oput.compute_struc_matrices(LLmodels, Ill)
H_matrices = oput.compute_struc_matrices(HLmodels, Ihl)

In [None]:
Ds = {}
Ds[None] = (df_base_train, df_abst_train)
    
joblib.dump((Gll, Ill), f"data/{experiment}/LL.pkl")
joblib.dump(ll_coeffs, f"data/{experiment}/ll_coeffs.pkl")

joblib.dump((Ghl, Ihl), f"data/{experiment}/HL.pkl")
joblib.dump(hl_coeffs, f"data/{experiment}/hl_coeffs.pkl")

joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

joblib.dump(omega, f"data/{experiment}/omega.pkl")
joblib.dump((U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat), f"data/{experiment}/exogenous_HL.pkl")

joblib.dump(LLmodels, f"data/{experiment}/LLmodels.pkl")
joblib.dump(HLmodels, f"data/{experiment}/HLmodels.pkl")

### DIROCA optimization

In [12]:
ll_bound = round(evut.compute_empirical_radius(N=num_llsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=l), 3)
hl_bound = round(evut.compute_empirical_radius(N=num_hlsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=h), 3)

In [13]:
epsilon, delta = ll_bound, hl_bound

eta_max = 0.001
eta_min = 0.001

max_iter = 5000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

initialization = 'random'

tol  = 1e-4
seed = 23

In [14]:
opt_params_erica = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'epsilon': epsilon,
                        'delta': delta,
                        'eta_min': eta_min,
                        'eta_max': eta_max,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'max_iter': max_iter,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'initialization': initialization,
                        'experiment': 'battery_discrete'
                    }

In [15]:
diroca_train_results_empirical = {}

In [None]:
# Define different epsilon=delta values
eps_delta_values     = [8, ll_bound, 1, 2, 4]

# For each epsilon=delta value
for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")
    # Update theta parameters
    if eps_delta == ll_bound:
        opt_params_erica['epsilon'] = ll_bound
        opt_params_erica['delta']   = hl_bound
    
    else:
        opt_params_erica['epsilon'] = eps_delta
        opt_params_erica['delta']   = eps_delta
    
    # Run ERICA optimization
    params_empirical, T_empirical = oput.run_empirical_erica_optimization_batt(**opt_params_erica)
    
    # Store results including optimization parameters and transformation matrix
    if eps_delta == ll_bound:
        diroca_train_results_empirical['T_'+str(ll_bound)+'-'+str(hl_bound)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }
    else:
        diroca_train_results_empirical['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }

print("\nTraining completed. T matrices stored in trained_results dictionary.")
print("Available ε=δ values:", list(diroca_train_results_empirical.keys()))



### GRADCA optimization

In [17]:
params_enrico, T_enrico = oput.run_empirical_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

100%|██████████| 2/2 [00:00<00:00, 428.36it/s]


In [18]:
diroca_train_results_empirical['T_0.00'] = {
                                'optimization_params': params_enrico,
                                'T_matrix': T_enrico
                            }

### BARYCA optimization

In [19]:
opt_params_bary = {
                        'U_ll_hat':U_ll_hat,
                        'U_hl_hat':U_hl_hat,
                        'L_matrices':L_matrices,
                        'H_matrices':H_matrices,
                        'max_iter':1000,
                        'tol':tol,
                        'seed':seed
                    }
                                 

In [None]:
T_bary = oput.run_empirical_bary_optim(**opt_params_bary)
params_bary = {'L':{}, 'H':{}}

In [21]:
diroca_train_results_empirical['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }

### RSCA optimization

In [22]:
opt_params_smooth = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'eta_min': eta_min,
                        'num_steps_min': num_steps_min,
                        'max_iter': 300, 
                        'tol': tol,
                        'seed': seed,
                        'noise_sigma': 0.1, 
                        'num_noise_samples': 10
                        }

In [None]:
params_smooth, T_smooth = oput.run_empirical_smooth_optimization_batt(**opt_params_smooth)

In [24]:
diroca_train_results_empirical['T_s'] = {
                                'optimization_params': params_smooth,
                                'T_matrix': T_smooth
                            }

### Abs-LiNGAM optimization

In [25]:
linabs_results = evut.run_abs_lingam_complete(df_base_train, df_abst_train)

diroca_train_results_empirical['T_pa'] = {'optimization_params':{'L':{'pert_U':U_ll_hat},'H':{'pert_U':U_hl_hat}}, 'T_matrix': linabs_results['Perfect']['T'].T}
diroca_train_results_empirical['T_na'] = {'optimization_params':{'L':{'pert_U':U_ll_hat},'H':{'pert_U':U_hl_hat}}, 'T_matrix': linabs_results['Noisy']['T'].T}

## Save results

In [None]:
joblib.dump(diroca_train_results_empirical, f"data/{experiment}/diroca_train_results_empirical.pkl")

In [30]:
#diroca_train_results_empirical = joblib.load(f"data/{experiment}/diroca_train_results_empirical.pkl")

# Downstream Evaluation

In [26]:
def map_wmg_to_lrcs_cg(wmg_cg_values):
    """
    Map WMG CG values into LRCS CG domain (75, 100, 200).
    This is manual based on the abstraction described.
    """
    mapped_cg = []
    for val in wmg_cg_values:
        if val in [0]:  # WMG 75 mapped to LRCS 75
            mapped_cg.append(0)
        elif val in [1, 2, 3, 4]:  # WMG 110, 150, 170, 180 mapped to LRCS 100
            mapped_cg.append(1)
        elif val in [5]:  # WMG 200 mapped to LRCS 200
            mapped_cg.append(2)
        else:
            raise ValueError(f"Unexpected WMG CG value {val} during mapping.")
    return np.array(mapped_cg)

def downstream_evaluation_fair(T, df_base, df_abst):
    """
    Implements paper's evaluation methodology: Scenarios (a), (b), (c).
    Assumes df_base and df_abst are preprocessed as described.
    """

    df_base_np = df_base.to_numpy()
    df_abst_np = df_abst.to_numpy()

    # Map and bin the base samples
    tau_samples = evut.map_n_bin_old(T, df_base_np, df_abst_np)

    # Map CG values from WMG (base) to LRCS space
    tau_samples[:, 0] = map_wmg_to_lrcs_cg(tau_samples[:, 0].astype(int))

    comma_gaps = np.unique(df_abst_np[:, 0])
    lasso_params = {'alpha': 0.0001, 'max_iter': 500, 'tol': 0.0001}

    # Concatenate LRCS and transported WMG (for scenarios b and c)
    enhanced_data = np.concatenate([df_abst_np, tau_samples])

    results = {'Real': [], 'Aug': [], 'AugReal': []}

    for cg in comma_gaps:
        # Scenario (a): Real only
        train_mask_a = (df_abst_np[:, 0] != cg)
        test_mask_a = (df_abst_np[:, 0] == cg)

        X_train_a = df_abst_np[train_mask_a, 0].reshape(-1, 1)
        y_train_a = df_abst_np[train_mask_a, 1]
        X_test_a = df_abst_np[test_mask_a, 0].reshape(-1, 1)
        y_test_a = df_abst_np[test_mask_a, 1]

        model_a = Lasso(**lasso_params).fit(X_train_a, y_train_a)
        y_pred_a = model_a.predict(X_test_a)
        mse_a = np.mean((y_pred_a - y_test_a) ** 2)
        results['Real'].append(mse_a)

        # Scenario (b): Augmented (full WMG support)
        # Train on LRCS (CG ≠ cg) + all tau_samples
        train_mask_b = np.ones(len(enhanced_data), dtype=bool)
        train_mask_b[:len(df_abst_np)][test_mask_a] = False  # Drop LRCS samples with CG=cg

        X_train_b = enhanced_data[train_mask_b, 0].reshape(-1, 1)
        y_train_b = enhanced_data[train_mask_b, 1]
        X_test_b = df_abst_np[test_mask_a, 0].reshape(-1, 1)
        y_test_b = df_abst_np[test_mask_a, 1]

        model_b = Lasso(**lasso_params).fit(X_train_b, y_train_b)
        y_pred_b = model_b.predict(X_test_b)
        mse_b = np.mean((y_pred_b - y_test_b) ** 2)
        results['Aug'].append(mse_b)

        # Scenario (c): Augmented without support
        test_mask_tau = (tau_samples[:, 0] == cg)

        train_data_c = np.concatenate([
            df_abst_np[~test_mask_a],
            tau_samples[~test_mask_tau]
        ])

        test_data_c = np.concatenate([
            df_abst_np[test_mask_a],
            tau_samples[test_mask_tau]
        ])

        X_train_c = train_data_c[:, 0].reshape(-1, 1)
        y_train_c = train_data_c[:, 1]
        X_test_c = test_data_c[:, 0].reshape(-1, 1)
        y_test_c = test_data_c[:, 1]

        model_c = Lasso(**lasso_params).fit(X_train_c, y_train_c)
        y_pred_c = model_c.predict(X_test_c)
        mse_c = np.mean((y_pred_c - y_test_c) ** 2)
        results['AugReal'].append(mse_c)

    # Aggregate
    final_results = {
        'Real': (np.mean(results['Real']), np.std(results['Real'])),
        'Aug': (np.mean(results['Aug']), np.std(results['Aug'])),
        'AugReal': (np.mean(results['AugReal']), np.std(results['AugReal'])),
    }
    
    return final_results

In [None]:
def print_ordered_results(results_dict, scenario_name):
    # Extract results for the given scenario
    scenario_results = {method: results_dict[method][scenario_name] for method in results_dict.keys()}
    
    # Sort by mean error (first element of the tuple) in descending order (worst to best)
    sorted_results = dict(sorted(scenario_results.items(), key=lambda x: x[1][0], reverse=True))
    
    print(f"\n{scenario_name} Scenario")
    print("="*80)
    print(f"{'Rank':<5} {'Method':<15} {'Error (mean ± std)':<35}")
    print("-"*80)
    
    for rank, (method, (mean, std)) in enumerate(sorted_results.items(), 1):
        print(f"{rank:<5} {method:<15} {mean:>8.4f} ± {std:<8.4f}")

# ========== FIRST compute downstream evaluations ==========
all_results = {}
for method in diroca_train_results_empirical.keys():
    T_matrix = diroca_train_results_empirical[method]['T_matrix']
    eval_result = downstream_evaluation_fair(T_matrix, df_base, df_abst)
    all_results[method] = eval_result

# ========== THEN print for each scenario ==========
print("\nAbstraction Performance Evaluation")
print("="*80)

# Real → Real scenario
print_ordered_results(all_results, 'Real')

# Aug → Real scenario
print_ordered_results(all_results, 'Aug')

# Real+Aug → Real scenario
print_ordered_results(all_results, 'AugReal')

print("\n" + "="*80)