In [1]:
import numpy as np
import torch
import joblib
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns

# Local modules
import modularised_utils as mut
import opt_utils as oput 
import evaluation_utils as evut
import Linear_Additive_Noise_Models as lanm
import params
import random



# Experiments set-up

In [2]:
experiment       = 'synth1'
#abduction       = False
coeff_estimation = False

# Define the number of samples per environment. Currently every environment has the same number of samples
if experiment == 'synth1' or experiment == 'synth1.1' or experiment == 'synth1.2':
    num_llsamples, num_hlsamples  = params.n_samples['synth1']
if experiment == 'lucas6x3' or experiment == 'lucas6x3.1' or experiment == 'lucas6x3.2':
    num_llsamples, num_hlsamples  = params.n_samples['lucas6x3']


In [3]:
Dll_obs  = mut.load_samples(experiment)[None][0] 
Gll, Ill = mut.load_model(experiment, 'LL')
n_varsll = len(Gll.nodes())

Dhl_obs  = mut.load_samples(experiment)[None][1] 
Ghl, Ihl = mut.load_model(experiment, 'HL')
n_varshl = len(Ghl.nodes())

omega    = mut.load_omega_map(experiment)

In [4]:
if coeff_estimation == True:
    ll_coeffs = mut.get_coefficients(Dll_obs, Gll)
    hl_coeffs = mut.get_coefficients(Dhl_obs, Ghl) 
else:
    ll_coeffs = mut.load_coeffs(experiment, 'LL')
    hl_coeffs = mut.load_coeffs(experiment, 'HL')

U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)
# if abduction == True:
#     U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
#     U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)
# else:
#     U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.load_exogenous(experiment, 'LL')
#     U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.load_exogenous(experiment, 'HL')

In [5]:
LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)
    
HLmodels = {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

# Optimization

### Diroca_e,d optimization

In [6]:
ll_bound = round(mut.compute_radius_lb(N=num_llsamples, eta=0.05, c=1000), 3)
hl_bound = round(mut.compute_radius_lb(N=num_hlsamples, eta=0.05, c=1000), 3)

print(f"LL bound: {ll_bound}, HL bound: {hl_bound}")

LL bound: 0.031, HL bound: 0.031


In [7]:
theta_hatL   = {'mu_U': mu_U_ll_hat, 'Sigma_U': Sigma_U_ll_hat, 'radius': ll_bound} #epsilonL
theta_hatH   = {'mu_U': mu_U_hl_hat, 'Sigma_U': Sigma_U_hl_hat, 'radius': hl_bound} #epsilonH

lambda_L = .6 #.6 for synth1 and epsilon-robust luvas

lambda_H = .3 #.3 for synth1 and epsilon-robust luvas

lambda_param_L = .2 #.2 #.3 
lambda_param_H = .1 #.1 #.3

xavier = False
project_onto_gelbrich = True

eta_max = 0.001
eta_min = 0.001

max_iter = 1000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

proximal_grad = True

tol = 1e-4
grad_clip = True

seed = 23

plot_steps = False
plot_epochs = False
display_results = False

In [8]:
opt_params_erica =  {
                        'theta_hatL': theta_hatL,
                        'theta_hatH': theta_hatH,
                        'initial_theta': 'empirical',
                        'LLmodels': LLmodels,
                        'HLmodels': HLmodels,
                        'omega': omega,
                        'lambda_L': lambda_L,
                        'lambda_H': lambda_H,
                        'lambda_param_L': lambda_param_L,
                        'lambda_param_H': lambda_param_H,
                        'xavier': xavier, 
                        'project_onto_gelbrich': project_onto_gelbrich, 
                        'eta_max': eta_max,
                        'eta_min': eta_min,
                        'max_iter': max_iter,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'proximal_grad': proximal_grad,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'grad_clip': grad_clip,
                        'plot_steps': plot_steps,
                        'plot_epochs': plot_epochs,
                        'display_results': display_results,
                        'experiment': experiment
                    }

In [9]:
# Define different epsilon=delta values
eps_delta_values     = [8, ll_bound, 1, 2, 4]
diroca_train_results = {}

# For each epsilon=delta value
for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")
    # Update theta parameters
    opt_params_erica['theta_hatL']['radius'] = eps_delta
    opt_params_erica['theta_hatH']['radius'] = eps_delta
    
    # Run ERICA optimization
    params_erica_prox, T_erica_prox = oput.run_erica_optimization(**opt_params_erica)
    
    # Store results including optimization parameters and transformation matrix
    diroca_train_results['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_erica_prox,
                                                    'T_matrix': T_erica_prox
                                                }

print("\nTraining completed. T matrices stored in trained_results dictionary.")
print("Available ε=δ values:", list(diroca_train_results.keys()))



Training for ε=δ = 8


 27%|██▋       | 271/1000 [00:25<01:07, 10.79it/s]


Convergence reached at epoch 272 with objective 2.419301986694336
Training for ε=δ = 0.031


 31%|███       | 307/1000 [00:27<01:01, 11.34it/s]


Convergence reached at epoch 308 with objective 0.015491485595703125
Training for ε=δ = 1


 14%|█▍        | 139/1000 [00:11<01:13, 11.66it/s]


Convergence reached at epoch 140 with objective 0.00617218017578125
Training for ε=δ = 2


 69%|██████▊   | 687/1000 [01:00<00:27, 11.31it/s]


Convergence reached at epoch 688 with objective 0.028204917907714844
Training for ε=δ = 4


 27%|██▋       | 271/1000 [00:23<01:02, 11.67it/s]


Convergence reached at epoch 272 with objective 2.419301986694336

Training completed. T matrices stored in trained_results dictionary.
Available ε=δ values: ['T_8', 'T_0.031', 'T_1', 'T_2', 'T_4']


### 2. ENRiCO optimization

In [10]:
# INCREASE max_iter

In [11]:
params_Lenrico, T_enrico = oput.run_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

100%|██████████| 1000/1000 [00:10<00:00, 96.15it/s]


In [12]:
diroca_train_results['T_0.00'] = {
                                'optimization_params': params_Lenrico,
                                'T_matrix': T_enrico
                            }

### 3. Barycentric optimization

In [13]:
projection_method = 'svd'
initialization = 'avg'
autograd = False
seed = seed
max_iter = max_iter
tol = tol
display_results = False

In [14]:
opt_params_bary =  {
                    'theta_L': theta_hatL,
                    'theta_H': theta_hatH,
                    'LLmodels': LLmodels,
                    'HLmodels': HLmodels,
                    'Ill': Ill,
                    'Ihl': Ihl,
                    'projection_method': projection_method,
                    'initialization': initialization,
                    'autograd': autograd,
                    'seed': seed,
                    'max_iter': max_iter,
                    'tol': tol,
                    'display_results': display_results
                    }

In [15]:
# Monge method
params_bary, T_bary = oput.barycentric_optimization(**opt_params_bary)

# Autograd method
params_bary_auto, T_bary_auto = oput.barycentric_optimization(**{**opt_params_bary, 'autograd': True})

In [16]:
diroca_train_results['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }
diroca_train_results['T_ba'] = {
                                'optimization_params': params_bary_auto,
                                'T_matrix': T_bary_auto
                            }

### 5. Random Smoothing

In [30]:
noise_sigma = 1.0
num_samples = 2

In [31]:
opt_params_smooth = {
                        'theta_hatL': theta_hatL,
                        'theta_hatH': theta_hatH,
                        'LLmodels': LLmodels,
                        'HLmodels': HLmodels,
                        'Ill': Ill,
                        'omega': omega,
                        'xavier': xavier,
                        'project_onto_gelbrich': project_onto_gelbrich,
                        'eta_min': eta_min,
                        'max_iter': max_iter,
                        'noise_sigma': noise_sigma,
                        'num_samples': num_samples,
                        'tol': tol,
                        'seed': seed
                    }

In [32]:
T_smooth = oput.random_smoothing_optimization(**opt_params_smooth)

100%|██████████| 1000/1000 [00:15<00:00, 65.88it/s]


In [33]:
params_smooth, T_smooth = T_smooth

In [34]:
diroca_train_results['T_s'] = {
                                'optimization_params': params_smooth,
                                'T_matrix': T_smooth
                            }

## Save the results

In [35]:
joblib.dump(diroca_train_results, f"data/{experiment}/diroca_train_results.pkl")

['data/synth1/diroca_train_results.pkl']