In [1]:
import numpy as np
import torch
import joblib
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
from sklearn.model_selection import train_test_split

import modularised_utils as mut
import opt_utils as oput 
import evaluation_utils as evut
import Linear_Additive_Noise_Models as lanm
import params
import random



# Experiments set-up

In [21]:
experiment       = 'lucas6x3' # or 'lucas6x3'
coeff_estimation = False #assumes knowledge of the structural functions when set to False

num_llsamples, num_hlsamples  = params.n_samples[experiment]

In [None]:
Dll_obs  = mut.load_samples(experiment)[None][0] 
Gll, Ill = mut.load_model(experiment, 'LL')
n_varsll = len(Gll.nodes())

Dhl_obs  = mut.load_samples(experiment)[None][1] 
Ghl, Ihl = mut.load_model(experiment, 'HL')
n_varshl = len(Ghl.nodes())

omega    = mut.load_omega_map(experiment)

test_size = 0.1

Dll_obs, Dll_obs_test = train_test_split(Dll_obs, test_size=test_size, random_state=42)
Dhl_obs, Dhl_obs_test = train_test_split(Dhl_obs, test_size=test_size, random_state=42)

joblib.dump(Dll_obs_test, f"data/{experiment}/Dll_obs_test.pkl")
joblib.dump(Dhl_obs_test, f"data/{experiment}/Dhl_obs_test.pkl")

In [23]:
if coeff_estimation == True:
    ll_coeffs = mut.get_coefficients(Dll_obs, Gll)
    hl_coeffs = mut.get_coefficients(Dhl_obs, Ghl) 
else:
    ll_coeffs = mut.load_coeffs(experiment, 'LL')
    hl_coeffs = mut.load_coeffs(experiment, 'HL')

U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)

LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)
    
HLmodels = {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

In [None]:
joblib.dump(LLmodels, f"data/{experiment}/LLmodels.pkl")
joblib.dump(HLmodels, f"data/{experiment}/HLmodels.pkl")

# Optimization

### Diroca_e,d optimization

In [None]:
ll_bound = round(mut.compute_radius_lb(N=num_llsamples, eta=0.05, c=1000), 3)
hl_bound = round(mut.compute_radius_lb(N=num_hlsamples, eta=0.05, c=1000), 3)

print(f"LL bound: {ll_bound}, HL bound: {hl_bound}") #robustness paramters provided by the theorem

In [26]:
theta_hatL   = {'mu_U': mu_U_ll_hat, 'Sigma_U': Sigma_U_ll_hat, 'radius': ll_bound} #epsilonL
theta_hatH   = {'mu_U': mu_U_hl_hat, 'Sigma_U': Sigma_U_hl_hat, 'radius': hl_bound} #epsilonH

lambda_L = .6 

lambda_H = .3 

lambda_param_L = .2 
lambda_param_H = .1 

xavier = False
project_onto_gelbrich = True

eta_max = 0.001
eta_min = 0.001

max_iter = 1000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

proximal_grad = True

tol = 1e-4
grad_clip = True

seed = 23

plot_steps = False
plot_epochs = False
display_results = False

In [27]:
opt_params_erica =  {
                        'theta_hatL': theta_hatL,
                        'theta_hatH': theta_hatH,
                        'initial_theta': 'empirical',
                        'LLmodels': LLmodels,
                        'HLmodels': HLmodels,
                        'omega': omega,
                        'lambda_L': lambda_L,
                        'lambda_H': lambda_H,
                        'lambda_param_L': lambda_param_L,
                        'lambda_param_H': lambda_param_H,
                        'xavier': xavier, 
                        'project_onto_gelbrich': project_onto_gelbrich, 
                        'eta_max': eta_max,
                        'eta_min': eta_min,
                        'max_iter': max_iter,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'proximal_grad': proximal_grad,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'grad_clip': grad_clip,
                        'plot_steps': plot_steps,
                        'plot_epochs': plot_epochs,
                        'display_results': display_results,
                        'experiment': experiment
                    }

In [None]:
eps_delta_values     = [8, ll_bound, 1, 2, 4]
diroca_train_results = {}

for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")

    # Update theta parameters
    opt_params_erica['theta_hatL']['radius'] = eps_delta
    opt_params_erica['theta_hatH']['radius'] = eps_delta
    
    # Run DIROCA optimization
    params_erica_prox, T_erica_prox = oput.run_erica_optimization(**opt_params_erica)
    
    diroca_train_results['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_erica_prox,
                                                    'T_matrix': T_erica_prox
                                                }

print("\nTraining completed.")
print("Available ε=δ values:", list(diroca_train_results.keys()))

### 2. GRADCA optimization

In [None]:
params_Lenrico, T_enrico = oput.run_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

In [30]:
diroca_train_results['T_0.00'] = {
                                'optimization_params': params_Lenrico,
                                'T_matrix': T_enrico
                            }

### 3. BARYCA optimization

In [31]:
projection_method = 'svd'
initialization = 'avg'
autograd = False
seed = seed
max_iter = max_iter
tol = tol
display_results = False

In [32]:
opt_params_bary =  {
                    'theta_L': theta_hatL,
                    'theta_H': theta_hatH,
                    'LLmodels': LLmodels,
                    'HLmodels': HLmodels,
                    'Ill': Ill,
                    'Ihl': Ihl,
                    'projection_method': projection_method,
                    'initialization': initialization,
                    'autograd': autograd,
                    'seed': seed,
                    'max_iter': max_iter,
                    'tol': tol,
                    'display_results': display_results
                    }

In [33]:
params_bary, T_bary = oput.barycentric_optimization(**opt_params_bary)

In [34]:
diroca_train_results['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }

### 5. RSCA optimization

In [35]:
opt_params_smooth = {
                        'theta_hatL': theta_hatL,
                        'theta_hatH': theta_hatH,
                        'LLmodels': LLmodels,
                        'HLmodels': HLmodels,
                        'Ill': Ill,
                        'omega': omega,
                        'xavier': xavier,
                        'project_onto_gelbrich': project_onto_gelbrich,
                        'eta_min': eta_min,
                        'max_iter': 300,
                        'noise_sigma': 0.1,
                        'num_samples': 10,
                        'tol': tol,
                        'seed': seed
                    }

In [None]:
T_smooth = oput.random_smoothing_optimization(**opt_params_smooth)

In [37]:
params_smooth, T_smooth = T_smooth

In [38]:
diroca_train_results['T_s'] = {
                                'optimization_params': params_smooth,
                                'T_matrix': T_smooth
                            }

## Save the results

In [None]:
joblib.dump(diroca_train_results, f"data/{experiment}/diroca_train_results.pkl")