In [1]:
import joblib
import numpy as np
import joblib
import numpy as np
import torch
import torch.nn as nn
import yaml
import utilities as ut 
import opt_tools as optools
import scipy.stats as stats
import seaborn as sns
from matplotlib import pyplot as plt



In [2]:
experiment = 'lilucas'

In [3]:
config_files = {
                    'hyperparams_diroca': 'configs/diroca_opt_config_empirical.yaml',
                    'hyperparams_gradca': 'configs/gradca_opt_config_empirical.yaml',
                    'hyperparams_baryca': 'configs/baryca_opt_config_empirical.yaml'
                }
configs = ut.load_configs(config_files)

hyperparams_diroca = configs['hyperparams_diroca']
hyperparams_gradca = configs['hyperparams_gradca']
hyperparams_baryca = configs['hyperparams_baryca']

In [4]:
all_data   = ut.load_all_data(experiment)
all_data['experiment_name'] = experiment

Dll_obs     = all_data['LLmodel']['data'][None]
Dhl_obs     = all_data['HLmodel']['data'][None]
folds_path  = f"data/{experiment}/cv_folds.pkl"
saved_folds = ut.prepare_cv_folds(Dll_obs, hyperparams_diroca['k_folds'], hyperparams_diroca['seed'], folds_path)

Data loaded for 'lilucas'.
Created and saved 10 folds to 'data/lilucas/cv_folds.pkl'


In [5]:
U_ll_hat = all_data['LLmodel']['noise'][None]
U_hl_hat = all_data['HLmodel']['noise'][None]

In [6]:
diroca_cv_results_empirical = {}

# Iterate through each cross-validation fold
for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Starting Empirical Optimization for Fold {i+1}/{len(saved_folds)} ---")
    fold_key = f'fold_{i}'
    diroca_cv_results_empirical[fold_key] = {}
    
    # Calculate the theoretical bounds for THIS FOLD'S training data
    train_n = len(fold_info['train'])

    l = U_ll_hat.shape[1]
    h = U_hl_hat.shape[1]
    
    ll_bound = round(ut.compute_empirical_radius(N=train_n, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=l), 3)
    hl_bound = round(ut.compute_empirical_radius(N=train_n, eta=0.05, c1=1000.0, c2=1.0, alpha=2.0, m=h), 3)
    
    # Define the list of (epsilon, delta) pairs to search over
    radius_pairs_to_test = [
                                (ll_bound, hl_bound), # The specific theoretical case
                                (1.0, 1.0),
                                (2.0, 2.0),
                                (4.0, 4.0),
                                (8.0, 8.0)
                            ]

    # Get the train split for the noise data for this fold
    U_ll_train = U_ll_hat[fold_info['train']]
    U_hl_train = U_hl_hat[fold_info['train']]

    # Iterate through each (epsilon, delta) pair
    for epsilon, delta in radius_pairs_to_test:
        print(f"  - Training for ε = {epsilon}, δ = {delta}")
        
        # a. Assemble the parameters using the helper function
        params_for_this_run = ut.assemble_empirical_parameters(
                                                                U_ll_train, 
                                                                U_hl_train, 
                                                                all_data, 
                                                                hyperparams_diroca
                                                            )
        
        # b. Set the epsilon and delta for this specific run
        params_for_this_run['epsilon'] = epsilon
        params_for_this_run['delta']   = delta

        opt_args = params_for_this_run.copy()
        opt_args.pop('k_folds', None)

        # Run the optimization
        trained_params, trained_T = optools.run_empirical_erica_optimization(**opt_args)
        
        # Store the results in the nested dictionary
        hyperparam_key = f'eps_{epsilon}_delta_{delta}'
        diroca_cv_results_empirical[fold_key][hyperparam_key] = {
                                                            'T_matrix': trained_T,
                                                            'optimization_params': trained_params,
                                                            'test_indices': fold_info['test'] 
                                                        }

print("\n\n--- Optimization Complete ---")


--- Starting Empirical Optimization for Fold 1/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:55<00:00,  8.65it/s]


  - Training for ε = 1.0, δ = 1.0


 43%|████▎     | 431/1000 [00:45<01:00,  9.41it/s]


Converged at iteration 432
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:55<00:00,  8.63it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:56<00:00,  8.56it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:49<00:00,  9.14it/s]



--- Starting Empirical Optimization for Fold 2/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:49<00:00,  9.12it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 159/1000 [00:17<01:31,  9.15it/s]


Converged at iteration 160
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:55<00:00,  8.66it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:52<00:00,  8.87it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:51<00:00,  8.97it/s]



--- Starting Empirical Optimization for Fold 3/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:52<00:00,  8.89it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 162/1000 [00:20<01:45,  7.97it/s]


Converged at iteration 163
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:58<00:00,  8.45it/s]


  - Training for ε = 4.0, δ = 4.0


 11%|█▏        | 113/1000 [00:11<01:31,  9.69it/s]


Converged at iteration 114
  - Training for ε = 8.0, δ = 8.0


 11%|█▏        | 113/1000 [00:11<01:32,  9.62it/s]


Converged at iteration 114

--- Starting Empirical Optimization for Fold 4/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:41<00:00,  9.89it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▋        | 163/1000 [00:16<01:26,  9.62it/s]


Converged at iteration 164
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:46<00:00,  9.36it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:38<00:00, 10.14it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:38<00:00, 10.13it/s]



--- Starting Empirical Optimization for Fold 5/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:40<00:00,  9.96it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 161/1000 [00:16<01:25,  9.77it/s]


Converged at iteration 162
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:40<00:00,  9.91it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:38<00:00, 10.17it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:41<00:00,  9.84it/s]



--- Starting Empirical Optimization for Fold 6/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:41<00:00,  9.84it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 162/1000 [00:16<01:27,  9.58it/s]


Converged at iteration 163
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:45<00:00,  9.45it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:43<00:00,  9.70it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:42<00:00,  9.74it/s]



--- Starting Empirical Optimization for Fold 7/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:42<00:00,  9.76it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 161/1000 [00:16<01:27,  9.62it/s]


Converged at iteration 162
  - Training for ε = 2.0, δ = 2.0


 11%|█▏        | 114/1000 [00:11<01:31,  9.64it/s]


Converged at iteration 115
  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:43<00:00,  9.70it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:47<00:00,  9.33it/s]



--- Starting Empirical Optimization for Fold 8/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:43<00:00,  9.62it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 162/1000 [00:16<01:27,  9.61it/s]


Converged at iteration 163
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:43<00:00,  9.66it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:43<00:00,  9.69it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:44<00:00,  9.59it/s]



--- Starting Empirical Optimization for Fold 9/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:47<00:00,  9.28it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 161/1000 [00:17<01:31,  9.13it/s]


Converged at iteration 162
  - Training for ε = 2.0, δ = 2.0


100%|██████████| 1000/1000 [01:45<00:00,  9.44it/s]


  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:45<00:00,  9.51it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:44<00:00,  9.59it/s]



--- Starting Empirical Optimization for Fold 10/10 ---
  - Training for ε = 0.321, δ = 0.103


100%|██████████| 1000/1000 [01:44<00:00,  9.56it/s]


  - Training for ε = 1.0, δ = 1.0


 16%|█▌        | 159/1000 [00:16<01:29,  9.38it/s]


Converged at iteration 160
  - Training for ε = 2.0, δ = 2.0


 11%|█▏        | 114/1000 [00:12<01:34,  9.35it/s]


Converged at iteration 115
  - Training for ε = 4.0, δ = 4.0


100%|██████████| 1000/1000 [01:46<00:00,  9.41it/s]


  - Training for ε = 8.0, δ = 8.0


100%|██████████| 1000/1000 [01:49<00:00,  9.17it/s]



--- Optimization Complete ---





### GRADCA optimization

In [9]:
gradca_cv_results_empirical = {}

# Iterate through each cross-validation fold
for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Starting Empirical Optimization for Fold {i+1}/{len(saved_folds)} ---")
    fold_key = f'fold_{i}'
    gradca_cv_results_empirical[fold_key] = {}
    
    # Calculate the theoretical bounds for THIS FOLD'S training data
    train_n = len(fold_info['train'])

    # Get the train split for the noise data for this fold
    U_ll_train = U_ll_hat[fold_info['train']]
    U_hl_train = U_hl_hat[fold_info['train']]
    
    params_for_this_run = ut.assemble_empirical_parameters(
                                                            U_ll_train, 
                                                            U_hl_train, 
                                                            all_data, 
                                                            hyperparams_gradca
                                                        )
    
    opt_args = params_for_this_run.copy()
    opt_args.pop('k_folds', None)

    # Run the optimization
    trained_params, trained_T = optools.run_empirical_erica_optimization(**opt_args)
        
    # Store the results in the dictionary
    gradca_cv_results_empirical[fold_key] = { 'gradca_run': {
                                                'T_matrix': trained_T,
                                                'optimization_params': trained_params,
                                                'test_indices': fold_info['test'] 
                                            }}

print("\n\n--- Optimization Complete ---")


--- Starting Empirical Optimization for Fold 1/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 358.80it/s]



--- Starting Empirical Optimization for Fold 2/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 367.92it/s]



--- Starting Empirical Optimization for Fold 3/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 371.77it/s]



--- Starting Empirical Optimization for Fold 4/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 334.83it/s]



--- Starting Empirical Optimization for Fold 5/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 377.31it/s]



--- Starting Empirical Optimization for Fold 6/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 376.03it/s]



--- Starting Empirical Optimization for Fold 7/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 376.34it/s]



--- Starting Empirical Optimization for Fold 8/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 382.81it/s]



--- Starting Empirical Optimization for Fold 9/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 374.29it/s]



--- Starting Empirical Optimization for Fold 10/10 ---


100%|██████████| 2000/2000 [00:05<00:00, 378.66it/s]



--- Optimization Complete ---





### BARYCA optimization

In [10]:
# Get the SCM instances and intervention sets from your loaded data
LLmodels     = all_data['LLmodel'].get('scm_instances')
Ill_relevant = all_data['LLmodel']['intervention_set']
HLmodels     = all_data['HLmodel'].get('scm_instances')
Ihl_relevant = all_data['HLmodel']['intervention_set']

# Compute the list of matrices for each model
L_matrices = ut.compute_struc_matrices(LLmodels, Ill_relevant)
H_matrices = ut.compute_struc_matrices(HLmodels, Ihl_relevant)

print(f"✓ Pre-computed {len(L_matrices)} low-level and {len(H_matrices)} high-level structural matrices.")

✓ Pre-computed 6 low-level and 3 high-level structural matrices.


In [11]:
baryca_cv_results_empirical = {}

# 3. Iterate through each cross-validation fold
for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Starting Empirical Barycentric Optimization for Fold {i+1}/{len(saved_folds)} ---")
    fold_key = f'fold_{i}'
    
    # Get the training split for the noise data for this fold
    U_ll_train = U_ll_hat[fold_info['train']]
    U_hl_train = U_hl_hat[fold_info['train']]
    
    # a. Assemble the arguments dictionary directly for this optimization run
    baryca_args = {
        'U_ll_hat': U_ll_train,
        'U_hl_hat': U_hl_train,
        'L_matrices': L_matrices,
        'H_matrices': H_matrices,
        # Get hyperparameters from the pre-loaded dictionary
        'max_iter': hyperparams_baryca['max_iter'],
        'tol': hyperparams_baryca['tol'],
        'seed': hyperparams_baryca['seed']
    }

    # b. Run the optimization
    # Note: This function only returns the T matrix
    trained_T = optools.run_empirical_bary_optim(**baryca_args)
    
    # c. Store the results for this fold
    baryca_cv_results_empirical[fold_key] = {
        'baryca_run': {
            'T_matrix': trained_T,
            'test_indices': fold_info['test'] 
        }
    }

print("\n\n--- Optimization Complete ---")


--- Starting Empirical Barycentric Optimization for Fold 1/10 ---


  0%|          | 0/2000 [00:00<?, ?it/s]

100%|██████████| 2000/2000 [00:00<00:00, 3271.52it/s]



--- Starting Empirical Barycentric Optimization for Fold 2/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 2935.58it/s]



--- Starting Empirical Barycentric Optimization for Fold 3/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3301.06it/s]



--- Starting Empirical Barycentric Optimization for Fold 4/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3306.45it/s]



--- Starting Empirical Barycentric Optimization for Fold 5/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3305.11it/s]



--- Starting Empirical Barycentric Optimization for Fold 6/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 2946.26it/s]



--- Starting Empirical Barycentric Optimization for Fold 7/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3256.11it/s]



--- Starting Empirical Barycentric Optimization for Fold 8/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3283.18it/s]



--- Starting Empirical Barycentric Optimization for Fold 9/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3271.01it/s]



--- Starting Empirical Barycentric Optimization for Fold 10/10 ---


100%|██████████| 2000/2000 [00:00<00:00, 3316.73it/s]



--- Optimization Complete ---





### Abs-LiNGAM optimization

In [12]:
abslingam_cv_results_empirical = {}

for i, fold_info in enumerate(saved_folds):
    print(f"\n--- Running Abs-LiNGAM for Fold {i+1}/{len(saved_folds)} ---")
    fold_key = f'fold_{i}'
    
    train_idx = fold_info['train']
    Dll_obs_train = Dll_obs[train_idx]
    Dhl_obs_train = Dhl_obs[train_idx]
    

    abslingam_results_for_fold = optools.run_abs_lingam_complete(Dll_obs_train, Dhl_obs_train)

    # c. Store the results for this fold
    abslingam_cv_results_empirical[fold_key] = {
        'Perfect': {
            'T_matrix': abslingam_results_for_fold['Perfect']['T'].T, # Transpose if needed
            'test_indices': fold_info['test']
        },
        'Noisy': {
            'T_matrix': abslingam_results_for_fold['Noisy']['T'].T, # Transpose if needed
            'test_indices': fold_info['test']
        }
    }
    
print("\n\n--- Optimization Complete ---")


--- Running Abs-LiNGAM for Fold 1/10 ---

--- Running Abs-LiNGAM for Fold 2/10 ---

--- Running Abs-LiNGAM for Fold 3/10 ---

--- Running Abs-LiNGAM for Fold 4/10 ---

--- Running Abs-LiNGAM for Fold 5/10 ---

--- Running Abs-LiNGAM for Fold 6/10 ---

--- Running Abs-LiNGAM for Fold 7/10 ---

--- Running Abs-LiNGAM for Fold 8/10 ---

--- Running Abs-LiNGAM for Fold 9/10 ---

--- Running Abs-LiNGAM for Fold 10/10 ---


--- Optimization Complete ---


## Save the results

In [13]:
joblib.dump(diroca_cv_results_empirical, f"data/{experiment}/diroca_cv_results_empirical.pkl")
joblib.dump(gradca_cv_results_empirical, f"data/{experiment}/gradca_cv_results_empirical.pkl")
joblib.dump(baryca_cv_results_empirical, f"data/{experiment}/baryca_cv_results_empirical.pkl")
joblib.dump(abslingam_cv_results_empirical, f"data/{experiment}/abslingam_cv_results_empirical.pkl")

print("All results have been saved successfully!")

All results have been saved successfully!


In [14]:
print('Done!')

Done!
