In [1]:
import joblib

import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import params

### Define low-level DCM "LL" and high-level DCM "HL"

In [2]:
experiment = 'little_lucas'

In [3]:
# Sm  = 'Smoking'
# Gen = 'Genetics'
# LC  = 'Lung Cancer'
# All = 'Allergy'
# Cou = 'Coughing'

# Env = 'Environment'
# Gen_ = 'Genetics_'
# LC_  = 'Lung Cancer_'

Sm  = 'Smoking'
Gen = 'Genetics'
LC  = 'Lung Cancer'
All = 'Allergy'
Cou = 'Coughing'
Fat = 'Fatigue'

Env = 'Environment'
Gen_ = 'Genetics_'
LC_  = 'Lung Cancer_'

In [4]:
ll_endogenous_coeff_dict = {
    (Sm, LC): 0.9,   # Smoking → Lung Cancer
    (Gen, LC): 0.8,  # Genetics → Lung Cancer
    (LC, Cou): 0.6,  # Lung Cancer → Coughing
    (LC, Fat): 0.9,  # Lung Cancer → Fatigue
    (Cou, Fat): 0.5, # Coughing → Fatigue
    (All, Cou): 0.4  # Allergy → Coughing
}

ll_causal_graph          = CBN(list(ll_endogenous_coeff_dict.keys()))


hl_endogenous_coeff_dict = {(Env, LC_): 0.0, (Gen_, LC_):0.0}
hl_causal_graph          = CBN(list(hl_endogenous_coeff_dict.keys()))

In [5]:
# Define the number of samples from the low-level environment.
num_llsamples   = params.n_samples[experiment][0]

### Construct the empirical nominal distribution/ environment

In [6]:
ll_mu_hat    = np.array([0, 0, 0.1, 0.1, 0.3, 0.2]) 
ll_Sigma_hat = np.diag([0.5, 2.0, 1.0, 1.5, 0.8, 1.2])
# # Alternative way of initializing these randomly
# ll_mu_hat    = np.random.randn(5)  
# ll_Sigma_hat = np.diag(np.random.rand(5)) 

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [7]:
# Baseline (No intervention)
iota0 = None  # No intervention

# Smoking interventions
iota1 = ops.Intervention({Sm: 0})  # Force no smoking
iota2 = ops.Intervention({Sm: 1})  # Force smoking

# Lung Cancer interventions
iota3 = ops.Intervention({LC: 0})  # Prevent lung cancer
iota4 = ops.Intervention({LC: 1})  # Force lung cancer occurrence

# Combined Smoking and Lung Cancer interventions
iota5 = ops.Intervention({Sm: 0, LC: 0})  # No smoking, prevent lung cancer
iota6 = ops.Intervention({Sm: 1, LC: 1})  # Smoking, force lung cancer
iota7 = ops.Intervention({Sm: 0, LC: 1})  # No smoking, force lung cancer
iota8 = ops.Intervention({Sm: 1, LC: 0})  # Smoking, prevent lung cancer

# Genetics interventions
iota9 = ops.Intervention({Gen: 0})  # No genetic predisposition
iota10 = ops.Intervention({Gen: 1})  # Strong genetic predisposition

# Combined Genetics and Smoking interventions
iota11 = ops.Intervention({Gen: 0, Sm: 0})  # No predisposition, no smoking
iota12 = ops.Intervention({Gen: 1, Sm: 1})  # Genetic predisposition, smoking
iota13 = ops.Intervention({Gen: 0, Sm: 1})  # No predisposition, smoking
iota14 = ops.Intervention({Gen: 1, Sm: 0})  # Genetic predisposition, no smoking

# Allergy interventions
iota15 = ops.Intervention({All: 0})  # No allergy
iota16 = ops.Intervention({All: 1})  # Force allergy

# Coughing and Fatigue interventions
iota17 = ops.Intervention({Cou: 0})  # No coughing
iota18 = ops.Intervention({Cou: 1, Fat: 1})  # Force coughing and fatigue
iota19 = ops.Intervention({Cou: 1, Fat: 0})  # Force coughing, no fatigue
iota20 = ops.Intervention({Cou: 0, Fat: 1})  # No coughing, force fatigue



# Baseline (No intervention)
eta0 = None  # No intervention

# Single-node interventions
eta1 = ops.Intervention({Env: 0})  # Force environment to have no impact
eta2 = ops.Intervention({Env: 1})  # Force environment to fully impact
eta3 = ops.Intervention({Gen_: 0})  # No genetic predisposition
eta4 = ops.Intervention({Gen_: 1})  # Strong genetic predisposition

# Combined node interventions
eta5 = ops.Intervention({Env: 0, Gen_: 0})  # No environmental or genetic influence
eta6 = ops.Intervention({Env: 1, Gen_: 1})  # Full environmental and genetic influence
eta7 = ops.Intervention({Env: 0, Gen_: 1})  # No environment, strong genetics
eta8 = ops.Intervention({Env: 1, Gen_: 0})  # Full environment, no genetics

# Lung Cancer direct interventions
eta9 = ops.Intervention({LC_: 0})  # Force no lung cancer occurrence
eta10 = ops.Intervention({LC_: 1})  # Force lung cancer occurrence



omega = {
    iota0: eta0,  # No intervention maps to no intervention
    iota1: eta1,  # Force no smoking -> Force no environmental influence
    iota2: eta2,  # Force smoking -> Force full environmental influence
    iota3: eta3,  # Prevent lung cancer -> No genetic predisposition
    iota4: eta4,  # Force lung cancer -> Strong genetic predisposition
    iota5: eta5,  # No smoking, prevent lung cancer -> No environment, no genetics
    iota6: eta6,  # Smoking, force lung cancer -> Full environment, strong genetics
    iota7: eta9,  # Prevent lung cancer -> Force no lung cancer occurrence
    iota8: eta10, # Force lung cancer -> Force lung cancer occurrence
    iota10: eta5, # No genetic predisposition, no smoking -> No environment, no genetics
    iota11: eta6, # Genetic predisposition, smoking -> Full environment, strong genetics
    iota12: eta3, # No genetic predisposition -> No genetic predisposition
    iota13: eta4, # Strong genetic predisposition -> Strong genetic predisposition
    iota14: eta8, # Smoking leads to coughing -> Full environment, no genetics
    iota15: eta7, # Force allergy -> No environment, strong genetics
    iota16: eta5, # No predisposition, no smoking, no lung cancer -> No environment, no genetics
    iota17: eta6, # Full predisposition, smoking, lung cancer -> Full environment, strong genetics
    iota18: eta10, # Smoking leads to coughing -> Force lung cancer occurrence
    iota19: eta9, # Smoking, no coughing -> Force no lung cancer occurrence
    iota20: eta8, # Genetic predisposition, no smoking -> Full environment, no genetics
}


Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

### Sampling and Pair construction

In [8]:
Dll_samples, Dll_noise = {}, {}
for iota in Ill_relevant:
    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    #Different Dll_noise for each iota
    lenv_iota         = mut.sample_distros_Gelbrich([(ll_mu_hat, ll_Sigma_hat)])[0] 
    Dll_noise[iota]   = lenv_iota.sample(num_llsamples)[0]
    Dll_samples[iota] = llcm.sample_settings(Dll_noise[iota])

In [9]:
T = np.array([
    [2, 1, 1, 0.5, 1, 0.5],  # Strong focus on Sm and LC for first output
    [0.5, 2, 0.8, 1.5, 0.7, 1],  # Balanced focus on Gen and All
    [1, 0.5, 1, 2, 0.9, 1.5]   # Emphasis on Cou and Fat
])

In [10]:
Dhl_samples = {}
for iota in Ill_relevant:
    Dhl_samples[omega[iota]] = Dll_samples[iota] @ T.T

In [11]:
Ds = {}
for iota in Ill_relevant:
    Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

In [12]:
hl_coeffs = mut.get_coefficients(Dhl_samples[None], hl_causal_graph) 

In [13]:
Dhl_noise, hl_mu_hat, hl_Sigma_hat = mut.lan_abduction(Dhl_samples[None], hl_causal_graph, hl_coeffs)
# hl_moments     = [(hl_mu_hat, hl_Sigma_hat)]
# henv           = mut.sample_distros_Gelbrich(hl_moments)[0] 

In [14]:
joblib.dump((ll_causal_graph, Ill_relevant), f"data/{experiment}/LL.pkl")
joblib.dump(ll_endogenous_coeff_dict, f"data/{experiment}/ll_coeffs.pkl")

joblib.dump((hl_causal_graph, Ihl_relevant), f"data/{experiment}/HL.pkl")
joblib.dump(hl_coeffs, f"data/{experiment}/hl_coeffs.pkl")

joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

joblib.dump(T, f"data/{experiment}/Tau.pkl")
joblib.dump(omega, f"data/{experiment}/omega.pkl")

joblib.dump((Dll_noise[None], ll_mu_hat, ll_Sigma_hat), f"data/{experiment}/exogenous_LL.pkl")
joblib.dump((Dhl_noise, hl_mu_hat, hl_Sigma_hat), f"data/{experiment}/exogenous_HL.pkl")

['data/little_lucas/exogenous_HL.pkl']