In [1]:
import json
import pickle
import time
from datetime import datetime, timedelta
from itertools import product

import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import joblib
import os

import matplotlib.pyplot as plt
from tqdm import tqdm

# Local modules
import modularised_utils as mut
import opt_utils as oput
import evaluation_utils as evut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import params

np.random.seed(0)



In [2]:
experiment       = 'synth1'
#abduction       = False
coeff_estimation = False

num_llsamples, num_hlsamples  = params.n_samples[experiment]

In [3]:
Dll_obs  = mut.load_samples(experiment)[None][0] 
Gll, Ill = mut.load_model(experiment, 'LL')
l        = len(Gll.nodes())

Dhl_obs  = mut.load_samples(experiment)[None][1] 
Ghl, Ihl = mut.load_model(experiment, 'HL')
h        = len(Ghl.nodes())

omega    = mut.load_omega_map(experiment)

In [4]:
if coeff_estimation == True:
    ll_coeffs = mut.get_coefficients(Dll_obs, Gll)
    hl_coeffs = mut.get_coefficients(Dhl_obs, Ghl) 
else:
    ll_coeffs = mut.load_coeffs(experiment, 'LL')
    hl_coeffs = mut.load_coeffs(experiment, 'HL')

U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)
# if abduction == True:
#     U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll_obs, Gll, ll_coeffs)
#     U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl_obs, Ghl, hl_coeffs)
# else:
#     U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.load_exogenous(experiment, 'LL')
#     U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.load_exogenous(experiment, 'HL')

In [5]:
LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)
    
HLmodels, Dhl_samples = {}, {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

In [6]:
L_matrices = oput.compute_struc_matrices(LLmodels, Ill)
H_matrices = oput.compute_struc_matrices(HLmodels, Ihl)

In [7]:
# ll_loc = params.loc[experiment][0]
# ll_scale = params.scale[experiment][0]
# hl_loc = params.loc[experiment][1]
# hl_scale = params.scale[experiment][1]
# lenv = ops.MultivariateLaplace(ll_loc, ll_scale) #LIGHT TAILS??????
# henv = ops.MultivariateLaplace(hl_loc, hl_scale)

# U_ll_hat = lenv.sample(num_llsamples)
# U_hl_hat = henv.sample(num_hlsamples)

# Optimisation

### 1. ERiCA optimization


In [8]:
def compute_empirical_radius(N, eta, c1=1.0, c2=1.0, alpha=2.0, m=3):
    """
    Compute epsilon_N(eta) for empirical Wasserstein case.

    Parameters:
    - N: int, number of samples
    - eta: float, confidence level (0 < eta < 1)
    - c1: float, constant from theorem (default 1.0, adjust if needed)
    - c2: float, constant from theorem (default 1.0, adjust if needed)
    - alpha: float, light-tail exponent (P[exp(||ξ||^α)] ≤ A)
    - m: int, ambient dimension

    Returns:
    - epsilon: float, the concentration radius
    """
    assert 0 < eta < 1, "eta must be in (0,1)"
    threshold = np.log(c1 / eta) / c2
    if N >= threshold:
        exponent = min(1/m, 0.5)
    else:
        exponent = 1 / alpha

    epsilon = (np.log(c1 / eta) / (c2 * N)) ** exponent
    return epsilon


In [9]:
ll_bound = round(compute_empirical_radius(N=num_llsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=l), 3)
hl_bound = round(compute_empirical_radius(N=num_hlsamples, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=h), 3)


In [10]:
epsilon, delta = ll_bound, hl_bound

eta_max = 0.001
eta_min = 0.001

max_iter = 1000
num_steps_min = 5
num_steps_max = 2

robust_L = True 
robust_H = True

initialization = 'random'

tol  = 1e-4
seed = 23

In [11]:
opt_params_erica = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'epsilon': epsilon,
                        'delta': delta,
                        'eta_min': eta_min,
                        'eta_max': eta_max,
                        'num_steps_min': num_steps_min,
                        'num_steps_max': num_steps_max,
                        'max_iter': max_iter,
                        'tol': tol,
                        'seed': seed,
                        'robust_L': robust_L,
                        'robust_H': robust_H,
                        'initialization': initialization,
                        'experiment': experiment
                    }

In [12]:
# Define different epsilon=delta values
eps_delta_values     = [8, ll_bound, 1, 2, 4]
diroca_train_results_empirical = {}

# For each epsilon=delta value
for eps_delta in eps_delta_values:
    print(f"Training for ε=δ = {eps_delta}")
    # Update theta parameters
    if eps_delta == ll_bound:
        opt_params_erica['epsilon'] = ll_bound
        opt_params_erica['delta']   = hl_bound
    
    else:
        opt_params_erica['epsilon'] = eps_delta
        opt_params_erica['delta']   = eps_delta
    
    # Run ERICA optimization
    params_empirical, T_empirical = oput.run_empirical_erica_optimization(**opt_params_erica)
    
    # Store results including optimization parameters and transformation matrix
    if eps_delta == ll_bound:
        diroca_train_results_empirical['T_'+str(ll_bound)+'-'+str(hl_bound)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }
    else:
        diroca_train_results_empirical['T_'+str(eps_delta)] = {
                                                    'optimization_params': params_empirical,
                                                    'T_matrix': T_empirical
                                                }

print("\nTraining completed. T matrices stored in trained_results dictionary.")
print("Available ε=δ values:", list(diroca_train_results_empirical.keys()))

Training for ε=δ = 8


 15%|█▌        | 154/1000 [00:27<02:29,  5.64it/s]


Converged at iteration 155
Training for ε=δ = 0.046


 80%|████████  | 805/1000 [02:24<00:34,  5.57it/s]


Converged at iteration 806
Training for ε=δ = 1


 22%|██▏       | 217/1000 [00:39<02:21,  5.54it/s]


Converged at iteration 218
Training for ε=δ = 2


 15%|█▌        | 154/1000 [00:28<02:34,  5.47it/s]


Converged at iteration 155
Training for ε=δ = 4


 15%|█▌        | 154/1000 [00:26<02:25,  5.83it/s]

Converged at iteration 155

Training completed. T matrices stored in trained_results dictionary.
Available ε=δ values: ['T_8', 'T_0.046-0.01', 'T_1', 'T_2', 'T_4']





In [13]:
# # Define different epsilon=delta values
# eps_delta_values = [1, 2, 4]
# trained_results = {}

# # For each epsilon=delta value
# for eps_delta in eps_delta_values:
#     print(f"Training for ε=δ = {eps_delta}")
#     # Update theta parameters
#     opt_params_erica['epsilon'] = eps_delta
#     opt_params_erica['delta'] = eps_delta
    
#     # Run both ERICA optimizations
#     params_empirical, T_empirical = oput.run_empirical_erica_optimization(**opt_params_erica)
    
#     # Store results including optimization parameters and transformation matrices
#     trained_results[eps_delta] = {'optimization_params': params_empirical, 'T_matrix': T_empirical}

# print("\nTraining completed. Empirical T matrices stored in trained_results dictionary.")
# print("Available ε=δ values:", list(trained_results.keys()))

### 2. ENRiCO optimization

In [14]:
params_enrico, T_enrico = oput.run_empirical_erica_optimization(**{**opt_params_erica, 'robust_L': False, 'robust_H': False})

100%|██████████| 1000/1000 [00:22<00:00, 43.58it/s]


In [15]:
diroca_train_results_empirical['T_0.00'] = {
                                'optimization_params': params_enrico,
                                'T_matrix': T_enrico
                            }

### 3. Barycentric Optimization

In [16]:
opt_params_bary = {
                        'U_ll_hat':U_ll_hat,
                        'U_hl_hat':U_hl_hat,
                        'L_matrices':L_matrices,
                        'H_matrices':H_matrices,
                        'max_iter':max_iter,
                        'tol':tol,
                        'seed':seed
                    }
                                    

In [17]:

T_bary = oput.run_empirical_bary_optim(**opt_params_bary)
params_bary = {'L':{}, 'H':{}}

100%|██████████| 1000/1000 [00:00<00:00, 1000.29it/s]


In [18]:
diroca_train_results_empirical['T_b'] = {
                                'optimization_params': params_bary,
                                'T_matrix': T_bary
                            }

### 3. Randomized Smoothing

In [19]:
opt_params_smooth = {
                        'U_L': U_ll_hat,
                        'U_H': U_hl_hat,
                        'L_models': LLmodels,
                        'H_models': HLmodels,
                        'omega': omega,
                        'eta_min': eta_min,
                        'num_steps_min': num_steps_min,
                        'max_iter': 300,
                        'tol': tol,
                        'seed': seed,
                        'noise_sigma': 0.1,
                        'num_noise_samples': 10
                        }

In [20]:
params_smooth, T_smooth = oput.run_empirical_smooth_optimization(**opt_params_smooth)

100%|██████████| 300/300 [03:06<00:00,  1.61it/s]


In [21]:
diroca_train_results_empirical['T_s'] = {
                                'optimization_params': params_smooth,
                                'T_matrix': T_smooth
                            }

## Save the results

In [22]:
joblib.dump(diroca_train_results_empirical, f"data/{experiment}/diroca_train_results_empirical.pkl")

['data/synth1/diroca_train_results_empirical.pkl']

In [23]:
print('done!')

done!
