In [1]:
import os
import joblib
import numpy as np
import yaml

# Make sure your 'models' and 'utilities' files are in the same directory
# or in a path that Python can find.
from models import LinearAddSCM, NonlinearAddSCM, CausalBayesianNetwork, Intervention
import utilities as ut

def create_intervention(spec):
    """Helper to create Intervention objects from the config definitions."""
    if spec is None or spec == 'None':
        return None
    return Intervention(spec)


/bin/sh: brew: command not found



In [2]:
# --- 1. Load and Unpack Configuration ---

# Specify the path to your configuration file
config_path = 'configs/lilucas_config.yaml'

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Set the seed for reproducibility
np.random.seed(config['seed'])

# Unpack main config parameters
experiment = config['experiment_name']
model_type = config['model_type']
ll_config = config['low_level_model']
hl_config = config['high_level_model']
abs_config = config['abstraction']
num_llsamples = config['num_llsamples']
num_hlsamples = config['num_hlsamples']

print(f"Configuration for experiment '{experiment}' loaded successfully.")
print(f"Model type: {model_type}")

Configuration for experiment 'lilucas' loaded successfully.
Model type: linear_anm


In [3]:
# --- 2. Low-Level Model Setup & Sampling ---

print(f"--- Generating data for {model_type} model: {experiment} ---")

# Initialize containers for data and noise
Dll_samples, Dll_noise = {}, {}
ll_causal_graph = None

# Get shared noise parameters
ll_mu_hat = np.array(ll_config['noise_params']['mu'])
ll_Sigma_hat = np.diag(ll_config['noise_params']['sigma_diag'])

# Create intervention objects and map
interventions = {name: create_intervention(spec) for name, spec in abs_config.get('interventions', {}).items()}
omega = {interventions[ll_name]: interventions[hl_name] for ll_name, hl_name in abs_config.get('omega_map', {}).items()}
Ill_relevant = list(set(omega.keys()))

if model_type == 'linear_anm':
    ll_coeffs_list = ll_config['coefficients']
    ll_endogenous_coeff_dict = {tuple(item[0]): item[1] for item in ll_coeffs_list}
    ll_causal_graph = CausalBayesianNetwork(list(ll_endogenous_coeff_dict.keys()))
    
    for iota in Ill_relevant:
        llcm = LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
        noise = np.random.multivariate_normal(mean=ll_mu_hat, cov=ll_Sigma_hat, size=num_llsamples)
        Dll_noise[iota] = noise
        Dll_samples[iota] = llcm.simulate(Dll_noise[iota])
    print("✓ Linear low-level sampling complete.")

elif model_type == 'continuous_nonlinear_anm':
    ll_causal_graph = CausalBayesianNetwork(ll_config['graph_edges'])
    safe_eval_scope = {'np': np} 
    functions = {var: eval(func_str, safe_eval_scope) for var, func_str in ll_config['structural_functions'].items()}

    for iota in Ill_relevant:
        llcm = NonlinearAddSCM(ll_causal_graph, functions, iota)
        noise = np.random.multivariate_normal(mean=ll_mu_hat, cov=ll_Sigma_hat, size=num_llsamples)
        Dll_noise[iota] = noise
        Dll_samples[iota] = llcm.simulate(Dll_noise[iota])
    print("✓ Non-linear low-level sampling complete.")

else:
    raise ValueError(f"Unknown model_type in config: {model_type}")

--- Generating data for linear_anm model: lilucas ---
✓ Linear low-level sampling complete.


In [4]:
# --- 3. Abstraction & High-Level Model Inference ---

hl_initial_coeff_dict = {tuple(item[0]): item[1] for item in hl_config['initial_coefficients']}
hl_causal_graph = CausalBayesianNetwork(list(hl_initial_coeff_dict.keys()))

# The set of relevant high-level interventions
Ihl_relevant = list(set(omega.values()))

# Apply the abstraction to get observational HL data
T = np.array(abs_config['T_matrix'])
data_observational_hl = Dll_samples[None] @ T.T

# Infer HL model coefficients and noise from the abstracted data
hl_endogenous_coeff_dict, U_hl = ut.get_coefficients(data_observational_hl, hl_causal_graph, return_noise=True)
hl_mu_hat = np.mean(U_hl, axis=0)
hl_Sigma_hat = np.diag(np.var(U_hl, axis=0))
print("✓ High-level model inferred.")

# --- 4. High-Level Sampling ---
Dhl_samples, Dhl_noise = {}, {}
for eta in Ihl_relevant:
    if eta is not None:
        hlcm = LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
        Dhl_noise[eta] = np.random.multivariate_normal(mean=hl_mu_hat, cov=hl_Sigma_hat, size=num_hlsamples)
        Dhl_samples[eta] = hlcm.simulate(Dhl_noise[eta])
    else: # Observational case
        Dhl_noise[eta] = U_hl
        Dhl_samples[eta] = data_observational_hl
print("✓ High-level sampling complete.")

✓ High-level model inferred.
✓ High-level sampling complete.


In [5]:
# --- 5. Package and Save the Data ---

# Create SCM model instances for saving
if model_type == 'linear_anm':
    LLmodels = {iota: LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota) for iota in Ill_relevant}
    HLmodels = {eta: LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta) for eta in Ihl_relevant}
elif model_type == 'continuous_nonlinear_anm':
    LLmodels = {iota: NonlinearAddSCM(ll_causal_graph, functions, iota) for iota in Ill_relevant}
    HLmodels = {eta: LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta) for eta in Ihl_relevant}

# Package LL model results
LLmodel = {
    'graph': ll_causal_graph, 'intervention_set': Ill_relevant,
    'noise_dist': {'mu': ll_mu_hat, 'sigma': ll_Sigma_hat}, 'data': Dll_samples,
    'scm_instances': LLmodels, 'noise': Dll_noise
}
if model_type == 'linear_anm':
    LLmodel['coeffs'] = {tuple(item[0]): item[1] for item in ll_config['coefficients']}
elif model_type == 'continuous_nonlinear_anm':
    LLmodel['functions'] = ll_config['structural_functions']

# Package HL model results
HLmodel = {
    'graph': hl_causal_graph, 'intervention_set': Ihl_relevant, 'coeffs': hl_endogenous_coeff_dict,
    'noise_dist': {'mu': hl_mu_hat, 'sigma': hl_Sigma_hat}, 'data': Dhl_samples,
    'scm_instances': HLmodels, 'noise': Dhl_noise
}

# Package abstraction data
abstraction_data = {'T': T, 'omega': omega}

# # Define save path and create directory
# path = f"data/{experiment}"
# os.makedirs(path, exist_ok=True)

# # Save files
# joblib.dump(LLmodel, f"{path}/LLmodel.pkl")
# joblib.dump(HLmodel, f"{path}/HLmodel.pkl")
# joblib.dump(abstraction_data, f"{path}/abstraction_data.pkl")

# print(f"✓ Data saved successfully to {path}/")

In [6]:
LLmodel['data'][None]

array([[-0.40546106,  1.24453144, -0.45683386,  1.04889911, -0.63583581,
         1.65775053],
       [ 0.34753562, -0.32013581,  0.15567814, -0.92655593, -0.35468084,
        -1.87339703],
       [ 1.6001979 ,  0.30434081,  1.78249075,  2.64308345,  2.30183702,
         4.69729223],
       ...,
       [-0.23311753,  0.78759165,  0.70520622,  1.53486053,  0.94908158,
         2.07418142],
       [-1.0859671 ,  1.06390896, -0.94316974,  0.34968928, -0.18103679,
         1.0683677 ],
       [-1.25025069,  1.45092703, -0.0950871 , -0.52694629, -0.42468992,
        -0.98621291]])

In [17]:
LLmodel['data'][None]

array([[-0.40546106,  1.24453144, -0.45683386,  1.04889911, -0.63583581,
         1.65775053],
       [ 0.34753562, -0.32013581,  0.15567814, -0.92655593, -0.35468084,
        -1.87339703],
       [ 1.6001979 ,  0.30434081,  1.78249075,  2.64308345,  2.30183702,
         4.69729223],
       ...,
       [-0.23311753,  0.78759165,  0.70520622,  1.53486053,  0.94908158,
         2.07418142],
       [-1.0859671 ,  1.06390896, -0.94316974,  0.34968928, -0.18103679,
         1.0683677 ],
       [-1.25025069,  1.45092703, -0.0950871 , -0.52694629, -0.42468992,
        -0.98621291]])