In [1]:
import joblib
import numpy as np
# import networkx as nx  

from models import LinearAddSCM, CausalBayesianNetwork, Intervention
import utilities as ut

### Define low-level DCM "LL" and high-level DCM "HL"

In [2]:
experiment = 'SLC'

In [3]:
S = 'Smoking'
T = 'Tar'
C = 'Cancer'

S_ = 'Smoking_'
C_ = 'Cancer_'

In [4]:
ll_endogenous_coeff_dict = {(S, T): 0.3, (T, C): 0.2}
ll_causal_graph          = CausalBayesianNetwork(list(ll_endogenous_coeff_dict.keys()))

hl_endogenous_coeff_dict = {(S_, C_): 0.0}
hl_causal_graph          = CausalBayesianNetwork(list(hl_endogenous_coeff_dict.keys()))

In [5]:
num_llsamples, num_hlsamples = 10000, 10000

### Construct the empirical nominal distribution for the low-level model

In [6]:
ll_mu_hat    = np.array([0, 0, 0])  
ll_Sigma_hat = np.diag([1, 1, 1])  

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [7]:
iota0 = None
iota1 = Intervention({S:0})
iota2 = Intervention({S:0, T:1})
iota3 = Intervention({S:1})
iota4 = Intervention({S:1, T:0})
iota5 = Intervention({S:1, T:1})

eta0 = None
eta1 = Intervention({S_:0})
eta2 = Intervention({S_:1})

omega = {   
            iota0: eta0,
            iota1: eta1,
            iota2: eta1,
            iota3: eta2,
            iota4: eta2,
            iota5: eta2
        }

Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

### Sampling and Pair construction

In [8]:
Dll_samples, Dll_noise = {}, {}
for iota in Ill_relevant:
    llcm              = LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    Dll_noise[iota]   = np.random.multivariate_normal(mean=ll_mu_hat, cov=ll_Sigma_hat, size=num_llsamples)
    Dll_samples[iota] = llcm.simulate(Dll_noise[iota])

### Define the abstraction T

In [9]:
T = np.array([[1, 2, 1], [0, 1, 0]])

### Compute the empirical nominal distribution for the high-level model and the linear coefficients

In [11]:
data_observational_hl = Dll_samples[None] @ T.T

hl_endogenous_coeff_dict, U_hl = ut.get_coefficients(data_observational_hl, hl_causal_graph, return_noise=True) 

hl_mu_hat    = np.mean(U_hl, axis=0)
hl_Sigma_hat = np.diag(np.var(U_hl, axis=0))

### Generate samples for the high-level model

In [12]:
Dhl_samples, Dhl_noise = {}, {}
for eta in Ihl_relevant:

    if eta is not None:
        hlcm             = LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
        Dhl_noise[eta]   = np.random.multivariate_normal(mean=hl_mu_hat, cov=hl_Sigma_hat, size=num_hlsamples)
        Dhl_samples[eta] = hlcm.simulate(Dhl_noise[eta])

    else:
        Dhl_noise[eta]   = U_hl
        Dhl_samples[eta] = data_observational_hl

### Save the data

In [13]:
LLmodel = {
    'graph': ll_causal_graph,
    'intervention_set': Ill_relevant,
    'coeffs': ll_endogenous_coeff_dict,
    'noise_dist': {
        'mu': ll_mu_hat,
        'sigma': ll_Sigma_hat
    }
}

HLmodel = {
    'graph': hl_causal_graph,
    'intervention_set': Ihl_relevant,
    'coeffs': hl_endogenous_coeff_dict,
    'noise_dist': {
        'mu': hl_mu_hat,
        'sigma': hl_Sigma_hat
    }
}

abstraction_data = {
    'T': T,
    'omega': omega
}

In [14]:
path = f"data/{experiment}"

joblib.dump(LLmodel, f"{path}/LLmodel.pkl")
joblib.dump(HLmodel, f"{path}/HLmodel.pkl")
joblib.dump(abstraction_data, f"{path}/abstraction_data.pkl")

['data/SLC/abstraction_data.pkl']

In [7]:


# --- 1. Define the experiment to load ---
# This must match the experiment_name used to save the data.
experiment = 'lilucas'
path = f"data/{experiment}"

# --- 2. Load the data from the .pkl files ---
try:
    LLmodel = joblib.load(f"{path}/LLmodel.pkl")
    HLmodel = joblib.load(f"{path}/HLmodel.pkl")
    abstraction_data = joblib.load(f"{path}/abstraction_data.pkl")
    print(f"✅ Successfully loaded data for experiment: '{experiment}'")

except FileNotFoundError:
    print(f"❌ Error: Data files not found in '{path}'.")
    print("Please ensure you have run the 'generate_data.py' script first.")

# --- 3. Example of how to access the loaded data ---
if 'LLmodel' in locals():
    print("\n--- Example Data Access ---")
    
    # Accessing low-level model components
    ll_coeffs = LLmodel['coeffs']
    print(f"\nLow-level coefficients: {ll_coeffs}")
    
    # Accessing high-level model components
    hl_graph = HLmodel['graph']
    print(f"High-level graph nodes: {hl_graph.nodes()}")
    
    # Accessing abstraction data
    T_matrix = abstraction_data['T']
    print(f"Abstraction T matrix shape: {T_matrix.shape}")

✅ Successfully loaded data for experiment: 'lilucas'

--- Example Data Access ---

Low-level coefficients: {('Smoking', 'Lung Cancer'): 0.9, ('Genetics', 'Lung Cancer'): 0.8, ('Lung Cancer', 'Coughing'): 0.6, ('Lung Cancer', 'Fatigue'): 0.9, ('Coughing', 'Fatigue'): 0.5, ('Allergy', 'Coughing'): 0.4}
High-level graph nodes: ['Environment', 'Lung Cancer_', 'Genetics_']
Abstraction T matrix shape: (3, 6)


In [1]:
import joblib
import numpy as np

# Define the experiment name for the non-linear dataset
experiment = 'nonlinlucas'
path = f"data/{experiment}"

# Load the data files from the path
try:
    LLmodel = joblib.load(f"{path}/LLmodel.pkl")
    HLmodel = joblib.load(f"{path}/HLmodel.pkl")
    abstraction_data = joblib.load(f"{path}/abstraction_data.pkl")
    print(f"✅ Successfully loaded data for experiment: '{experiment}'")

except FileNotFoundError:
    print(f"❌ Error: Data files not found in '{path}'.")
    print("Please ensure you have run the data generation script first.")

# Test and verify the loaded non-linear data
if 'LLmodel' in locals():
    print("\n--- Verifying Loaded Data ---")
    
    # Check for the 'functions' key, which is specific to the non-linear model
    if 'functions' in LLmodel:
        print("✓ Found 'functions' key, confirming non-linear model was loaded.")
        
        # Print one of the non-linear function strings as an example
        lung_cancer_func = LLmodel['functions']['Lung_Cancer']
        print(f"  - Example function for Lung_Cancer: {lung_cancer_func}")
    else:
        print("⚠️ Warning: Expected 'functions' key not found. Loaded model may be linear.")
    
    # The HL model is still inferred as linear, so it has 'coeffs'
    hl_coeffs = HLmodel['coeffs']
    print(f"\nInferred high-level coefficients: {hl_coeffs}")

✅ Successfully loaded data for experiment: 'nonlinlucas'

--- Verifying Loaded Data ---
✓ Found 'functions' key, confirming non-linear model was loaded.
  - Example function for Lung_Cancer: lambda Smoking, Genetics: 0.9 * np.abs(Smoking)**1.2 + 0.8 * np.sin(Genetics * 3)

Inferred high-level coefficients: {('Environment', 'Lung_Cancer_'): 0.7047221723831408, ('Genetics_', 'Lung_Cancer_'): 0.44343675340811933}
