In [20]:
import joblib
import numpy as np
from sklearn.model_selection import KFold
import opt_utilities as optu

In [10]:
experiment = 'slc'

In [16]:
path = f"data/{experiment}"

low_model = joblib.load(f"{path}/LLmodel.pkl")
high_model = joblib.load(f"{path}/HLmodel.pkl")
abstraction_data = joblib.load(f"{path}/abstraction_data.pkl")

G_ll = low_model['graph']
n_varsll = len(G_ll.nodes())
I_ll_relevant = low_model['intervention_set']
D_ll = low_model['data']
LLmodels = low_model['scm_instances']
muL, SigmaL = low_model['noise_dist']['mu'], low_model['noise_dist']['sigma']

G_hl = high_model['graph']
n_varshl = len(G_hl.nodes())
I_hl_relevant = high_model['intervention_set']
D_hl = high_model['data']
HLmodels = high_model['scm_instances']
muH, SigmaH = high_model['noise_dist']['mu'], high_model['noise_dist']['sigma']

T = abstraction_data['T']
omega = abstraction_data['omega']

In [None]:

hyperparameters = {
    # Regularization parameters
    'lambda_L': 0.6,
    'lambda_H': 0.3,
    'lambda_param_L': 0.2,
    'lambda_param_H': 0.1,
    
    # Optimization parameters
    'eta_max': 0.001,
    'eta_min': 0.001,
    'max_iter': 1000,
    'num_steps_min': 5,
    'num_steps_max': 2,
    'tol': 1e-4,

    # K-Fold Cross-Validation settings
    'k_folds': 5,
    'random_state': 42, # For reproducible folds

    # Method flags from your original code
    'xavier': False,
    'project_onto_gelbrich': True,
    'proximal_grad': True,
    'grad_clip': True,
    'robust_L': True,
    'robust_H': True
}

print("✓ Hyperparameters configured and grouped into a dictionary.")
print(f"  - Number of K-fold splits to generate: {hyperparameters['k_folds']}")

✓ Hyperparameters configured and grouped into a dictionary.
  - Number of K-fold splits to generate: 5


In [None]:
# --- Prepare and Save K-Fold Splits ---
num_total_samples = D_ll[None].shape[0]

kf = KFold(n_splits=hyperparameters['k_folds'], 
           shuffle=True, 
           random_state=hyperparameters['random_state'])

# 3. Generate and store the train/test indices for each fold
fold_indices = []
for train_index, test_index in kf.split(np.arange(num_total_samples)):
    fold_indices.append({'train': train_index, 'test': test_index})

# 4. Save the fold definitions to the experiment's data folder for later use
folds_path = f"data/{experiment}/cv_folds.pkl"
joblib.dump(fold_indices, folds_path)

print(f"✓ Created and saved {len(fold_indices)} cross-validation folds to:")
print(f"  '{folds_path}'")

✓ Created and saved 5 cross-validation folds to:
  'data/slc/cv_folds.pkl'


In [None]:
# --- Cell 6: Prepare a Single Fold for Optimization ---

# 1. Load the pre-defined folds we just saved
folds_path = f"data/{experiment}/cv_folds.pkl"
fold_indices = joblib.load(folds_path)

# 2. Select the first fold (fold 0) as an example
print("--- Assembling parameters for Fold 0 ---")
current_fold = fold_indices[0]
train_idx, test_idx = current_fold['train'], current_fold['test']

# You can create the train/test data splits for this fold if your function needs them
# Dll_obs_train, Dll_obs_test = Dll_obs[train_idx], Dll_obs[test_idx]

# 3. Assemble the final optimization parameter dictionary
# Start with the general hyperparameters we defined earlier
opt_params = hyperparameters.copy()

# Add the models and mappings
opt_params['LLmodels'] = LLmodels
opt_params['HLmodels'] = HLmodels
opt_params['omega'] = omega
opt_params['experiment'] = experiment
opt_params['initial_theta'] = 'empirical'

train_n = len(train_idx)

ll_bound = round(optu.compute_radius_lb(N=train_n, eta=0.05, c=1000), 3)
hl_bound = round(optu.compute_radius_lb(N=train_n, eta=0.05, c=1000), 3)

opt_params['theta_hatL'] = {
    'mu_U': muL, 
    'Sigma_U': SigmaL, 
    'radius': ll_bound
}
opt_params['theta_hatH'] = {
    'mu_U': muH, 
    'Sigma_U': SigmaH, 
    'radius': hl_bound
}

--- Assembling parameters for Fold 0 ---

✓ Assembled final optimization parameters for one fold.
The 'opt_params' dictionary is now ready to be passed to your optimization routine.

Keys in opt_params: ['lambda_L', 'lambda_H', 'lambda_param_L', 'lambda_param_H', 'eta_max', 'eta_min', 'max_iter', 'num_steps_min', 'num_steps_max', 'tol', 'k_folds', 'random_state', 'xavier', 'project_onto_gelbrich', 'proximal_grad', 'grad_clip', 'robust_L', 'robust_H', 'LLmodels', 'HLmodels', 'omega', 'experiment', 'initial_theta', 'theta_hatL', 'theta_hatH']


In [2]:
import joblib
import numpy as np
import yaml
from sklearn.model_selection import KFold
import opt_utilities as ut 


def load_all_data(experiment_name):
    """Loads all model blueprints and abstraction data for a given experiment."""
    path = f"data/{experiment_name}"
    data = {
        'LLmodel': joblib.load(f"{path}/LLmodel.pkl"),
        'HLmodel': joblib.load(f"{path}/HLmodel.pkl"),
        'abstraction_data': joblib.load(f"{path}/abstraction_data.pkl")
    }
    print(f"Data loaded for '{experiment_name}'.")

    return data

def prepare_cv_folds(observational_data, k, random_state, save_path):
    """Generates and saves K-Fold train/test indices."""
    kf = KFold(n_splits=k, shuffle=True, random_state=random_state)
    num_samples = observational_data.shape[0]
    
    fold_indices = [{'train': train_idx, 'test': test_idx} 
                    for train_idx, test_idx in kf.split(np.arange(num_samples))]
    
    joblib.dump(fold_indices, save_path)
    print(f"Created and saved {len(fold_indices)} folds to '{save_path}'")
    return fold_indices

def assemble_fold_parameters(fold_indices, all_data, hyperparameters):
    """Assembles the final opt_params dictionary for a specific fold."""
    # Start with the general hyperparameters
    opt_params = hyperparameters.copy()

    # Add the core models and mappings
    opt_params['LLmodels'] = all_data['LLmodel'].get('scm_collection')
    opt_params['HLmodels'] = all_data['HLmodel'].get('scm_collection')
    opt_params['omega'] = all_data['abstraction_data']['omega']
    opt_params['experiment'] = all_data['experiment_name']
    opt_params['initial_theta'] = 'empirical'
    
    # Calculate fold-specific radius
    train_n  = len(fold_indices['train'])
    ll_bound = round(ut.compute_radius_lb(N=train_n, eta=0.05, c=1000), 3)
    hl_bound = round(ut.compute_radius_lb(N=train_n, eta=0.05, c=1000), 3)

    # Add the final theta parameters
    opt_params['theta_hatL'] = {
                                    'mu_U': all_data['LLmodel']['noise_dist']['mu'], 
                                    'Sigma_U': all_data['LLmodel']['noise_dist']['sigma'], 
                                    'radius': ll_bound
                                }
    opt_params['theta_hatH'] = {
                                    'mu_U': all_data['HLmodel']['noise_dist']['mu'], 
                                    'Sigma_U': all_data['HLmodel']['noise_dist']['sigma'], 
                                    'radius': hl_bound
                                }
    
    return opt_params

In [3]:

experiment = 'slc'
all_data = load_all_data(experiment)
with open('configs/optimization_hyperparams.yaml', 'r') as f:
    hyperparams = yaml.safe_load(f)

# Add experiment name to the data dictionary for easy access
all_data['experiment_name'] = experiment

# 2. Prepare and save the cross-validation folds (a one-time setup)
Dll_obs = all_data['LLmodel']['data'][None]
folds_path = f"data/{experiment}/cv_folds.pkl"
saved_folds = prepare_cv_folds(Dll_obs, hyperparams['k_folds'], hyperparams['seed'], folds_path)

Data loaded for 'slc'.
Created and saved 5 folds to 'data/slc/cv_folds.pkl'


In [9]:

# --- The Main Optimization and Evaluation Loop ---

# 1. Load the pre-defined folds
folds_path = f"data/{experiment}/cv_folds.pkl"
saved_folds = joblib.load(folds_path)

# 2. Initialize a dictionary to store all results
cv_results = {}

# 3. Outer Loop: Iterate through each cross-validation fold
for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Starting Cross-Validation for Fold {i+1}/{len(saved_folds)} ---")
    cv_results[f'fold_{i}'] = {}
    
    # Determine the fold-specific radius bound to include in our search
    train_n = len(fold_info['train'])
    ll_bound = round(ut.compute_radius_lb(N=train_n, eta=0.05, c=1000), 3)
    
    # Define the hyperparameter values to search over for this fold
    eps_delta_values = [ll_bound, 1, 2, 4, 8]

    # 4. Inner Loop: Iterate through each hyperparameter value (eps_delta)
    for eps_delta in eps_delta_values:
        print(f"  - Training for ε=δ = {eps_delta}")

        # a. Assemble the base parameters for this specific fold
        #    This uses the helper function we defined earlier.
        params_for_run = assemble_fold_parameters(fold_info, all_data, hyperparams)
        
        # b. Update the radius for this specific run
        params_for_run['theta_hatL']['radius'] = eps_delta
        params_for_run['theta_hatH']['radius'] = eps_delta
        
        # c. Run the actual optimization
        #    This calls your function with all the prepared parameters.
        trained_params, trained_T = oput.run_erica_optimization(**params_for_run)
        
        # d. Store the results in our nested dictionary
        cv_results[f'fold_{i}'][f'T_{eps_delta}'] = {
            'optimization_params': trained_params,
            'T_matrix': trained_T
        }

print("\n\n--- Optimization Complete ---")
print(f"Results for all {len(saved_folds)} folds have been stored in the 'cv_results' dictionary.")
# print("Result structure:", cv_results.keys())


--- Starting Cross-Validation for Fold 1/5 ---
  - Training for ε=δ = 0.111


TypeError: run_erica_optimization() got an unexpected keyword argument 'k_folds'

In [5]:
import time

# --- The Main K-Fold Cross-Validation Loop ---

# 1. Load the pre-defined folds and hyperparameters
folds_path = f"data/{experiment}/cv_folds.pkl"
saved_folds = joblib.load(folds_path)

# 2. Initialize a list to store the results from each fold
cv_results = []

print(f"🚀 Starting {len(saved_folds)}-Fold Cross-Validation for experiment: '{experiment}'...")
start_time = time.time()

# 3. Outer Loop: Iterate through each cross-validation fold
for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Running Optimization for Fold {i+1}/{len(saved_folds)} ---")

    # a. Assemble the base parameters for this specific fold
    params_for_this_fold = assemble_fold_parameters(fold_info, all_data, hyperparams)
    
    # b. Prepare a clean dictionary for the optimization function
    #    This removes keys that run_erica_optimization doesn't expect.
    erica_args = params_for_this_fold.copy()
    erica_args.pop('k_folds', None)
    erica_args.pop('random_state', None)
    
    # c. Run the actual optimization for this fold
    trained_params, trained_T = oput.run_erica_optimization(**erica_args)
    
    # d. Store the important results from this fold
    cv_results.append({
        'fold': i,
        'test_indices': fold_info['test'],
        'T_matrix': trained_T,
        'optimization_params': trained_params
    })
    
    print(f"✓ Fold {i+1} complete.")

end_time = time.time()
print("\n\n--- Cross-Validation Finished ---")
print(f"Total time: {end_time - start_time:.2f} seconds")
print(f"Stored results for all {len(cv_results)} folds in the 'cv_results' list.")

# You can now save your final results
results_path = f"data/{experiment}/cv_results.pkl"
joblib.dump(cv_results, results_path)
print(f"✓ Final results saved to '{results_path}'")

🚀 Starting 5-Fold Cross-Validation for experiment: 'slc'...

--- Running Optimization for Fold 1/5 ---


  0%|          | 0/1000 [00:00<?, ?it/s]


AttributeError: 'NoneType' object has no attribute 'keys'

In [6]:
import numpy as np
import torch
import joblib
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
from sklearn.model_selection import train_test_split

import modularised_utils as mut
import opt_utils as oput 
import evaluation_utils as evut
import Linear_Additive_Noise_Models as lanm
import params
import random

# Experiments set-up

In [7]:
experiment       = 'synth1' # or 'lucas6x3'
coeff_estimation = False #assumes knowledge of the structural functions when set to False

num_llsamples, num_hlsamples  = params.n_samples[experiment]



In [8]:
Dll_obs  = mut.load_samples(experiment)[None][0] 
Gll, Ill = mut.load_model(experiment, 'LL')
n_varsll = len(Gll.nodes())

Dhl_obs  = mut.load_samples(experiment)[None][1] 
Ghl, Ihl = mut.load_model(experiment, 'HL')
n_varshl = len(Ghl.nodes())

omega    = mut.load_omega_map(experiment)

test_size = 0.1

Dll_obs, Dll_obs_test = train_test_split(Dll_obs, test_size=test_size, random_state=42)
Dhl_obs, Dhl_obs_test = train_test_split(Dhl_obs, test_size=test_size, random_state=42)

joblib.dump(Dll_obs_test, f"data/{experiment}/Dll_obs_test.pkl")
joblib.dump(Dhl_obs_test, f"data/{experiment}/Dhl_obs_test.pkl")

['data/synth1/Dhl_obs_test.pkl']

In [9]:
if coeff_estimation == True:
    ll_coeffs = mut.get_coefficients(Dll_obs, Gll)
    hl_coeffs = mut.get_coefficients(Dhl_obs, Ghl) 
else:
    ll_coeffs = mut.load_coeffs(experiment, 'LL')
    hl_coeffs = mut.load_coeffs(experiment, 'HL')

U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)

LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)
    
HLmodels = {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

In [10]:
LLmodels

{<operations.Intervention at 0x163a20e00>: <Linear_Additive_Noise_Models.LinearAddSCM at 0x160c924b0>,
 <operations.Intervention at 0x163a22a20>: <Linear_Additive_Noise_Models.LinearAddSCM at 0x16332ae70>,
 <operations.Intervention at 0x163a21b50>: <Linear_Additive_Noise_Models.LinearAddSCM at 0x163a21040>,
 <operations.Intervention at 0x163a22930>: <Linear_Additive_Noise_Models.LinearAddSCM at 0x163a23350>,
 <operations.Intervention at 0x163a22c00>: <Linear_Additive_Noise_Models.LinearAddSCM at 0x16360c9b0>,
 None: <Linear_Additive_Noise_Models.LinearAddSCM at 0x163a23500>}

In [None]:
joblib.dump(LLmodels, f"data/{experiment}/LLmodels.pkl")
joblib.dump(HLmodels, f"data/{experiment}/HLmodels.pkl")

# Optimization

### Diroca_e,d optimization

In [None]:
ll_bound = round(mut.compute_radius_lb(N=num_llsamples, eta=0.05, c=1000), 3)
hl_bound = round(mut.compute_radius_lb(N=num_hlsamples, eta=0.05, c=1000), 3)

print(f"LL bound: {ll_bound}, HL bound: {hl_bound}") 

In [26]:
theta_hatL   = {'mu_U': mu_U_ll_hat, 'Sigma_U': Sigma_U_ll_hat, 'radius': ll_bound} #epsilonL
theta_hatH   = {'mu_U': mu_U_hl_hat, 'Sigma_U': Sigma_U_hl_hat, 'radius': hl_bound} #epsilonH

lambda_L = .6 

lambda_H = .3 

lambda_param_L = .2 
lambda_param_H = .1 

xavier = False
project_onto_gelbrich = True

eta_max = 0.001
eta_min = 0.001

max_iter = 1000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

proximal_grad = True

tol = 1e-4
grad_clip = True

seed = 23

plot_steps = False
plot_epochs = False
display_results = False

In [27]:
opt_params_erica =  {
                        'theta_hatL': theta_hatL,
                        'theta_hatH': theta_hatH,
                        'initial_theta': 'empirical',
                        'LLmodels': LLmodels,
                        'HLmodels': HLmodels,
                        'omega': omega,
                        'lambda_L': lambda_L,
                        'lambda_H': lambda_H,
                        'lambda_param_L': lambda_param_L,
                        'lambda_param_H': lambda_param_H,
                        'xavier': xavier, 
                        'project_onto_gelbrich': project_onto_gelbrich, 
                        'eta_max': eta_max,
                        'eta_min': eta_min,
                        'max_iter': max_iter,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'proximal_grad': proximal_grad,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'grad_clip': grad_clip,
                        'plot_steps': plot_steps,
                        'plot_epochs': plot_epochs,
                        'display_results': display_results,
                        'experiment': experiment
                    }

In [None]:
eps_delta_values     = [8, ll_bound, 1, 2, 4]
diroca_train_results = {}

for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")

    # Update theta parameters
    opt_params_erica['theta_hatL']['radius'] = eps_delta
    opt_params_erica['theta_hatH']['radius'] = eps_delta
    
    # Run DIROCA optimization
    params_erica_prox, T_erica_prox = oput.run_erica_optimization(**opt_params_erica)
    
    diroca_train_results['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_erica_prox,
                                                    'T_matrix': T_erica_prox
                                                }

print("\nTraining completed.")
print("Available ε=δ values:", list(diroca_train_results.keys()))

### 2. GRADCA optimization

In [None]:
params_Lenrico, T_enrico = oput.run_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

In [30]:
diroca_train_results['T_0.00'] = {
                                'optimization_params': params_Lenrico,
                                'T_matrix': T_enrico
                            }

### 3. BARYCA optimization

In [31]:
projection_method = 'svd'
initialization = 'avg'
autograd = False
seed = seed
max_iter = max_iter
tol = tol
display_results = False

In [32]:
opt_params_bary =  {
                    'theta_L': theta_hatL,
                    'theta_H': theta_hatH,
                    'LLmodels': LLmodels,
                    'HLmodels': HLmodels,
                    'Ill': Ill,
                    'Ihl': Ihl,
                    'projection_method': projection_method,
                    'initialization': initialization,
                    'autograd': autograd,
                    'seed': seed,
                    'max_iter': max_iter,
                    'tol': tol,
                    'display_results': display_results
                    }

In [33]:
params_bary, T_bary = oput.barycentric_optimization(**opt_params_bary)

In [34]:
diroca_train_results['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }

## Save the results

In [None]:
joblib.dump(diroca_train_results, f"data/{experiment}/diroca_train_results.pkl")