In [1]:
import joblib

import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import evaluation_utils as evut
import params

### Define low-level DCM "LL" and high-level DCM "HL"

In [2]:
experiment = 'lucas6x3'

In [3]:
# 6x3 LUCAS
Sm  = 'Smoking'
Gen = 'Genetics'
LC  = 'Lung Cancer'
All = 'Allergy'
Cou = 'Coughing'
Fat = 'Fatigue'

Env = 'Environment'
Gen_ = 'Genetics_'
LC_  = 'Lung Cancer_'

In [4]:
ll_endogenous_coeff_dict = {
                            (Sm, LC): 0.9,   # Smoking → Lung Cancer
                            (Gen, LC): 0.8,  # Genetics → Lung Cancer
                            (LC, Cou): 0.6,  # Lung Cancer → Coughing
                            (LC, Fat): 0.9,  # Lung Cancer → Fatigue
                            (Cou, Fat): 0.5, # Coughing → Fatigue
                            (All, Cou): 0.4  # Allergy → Coughing
                           }

hl_endogenous_coeff_dict = {
                            (Env, LC_): 0.0, # Environment → Lung Cancer
                            (Gen_, LC_):0.0 # Genetics → Lung Cancer
                           }

In [5]:
ll_causal_graph          = CBN(list(ll_endogenous_coeff_dict.keys()))
hl_causal_graph          = CBN(list(hl_endogenous_coeff_dict.keys()))

In [6]:
num_llsamples   = params.n_samples[experiment][0]
num_hlsamples   = params.n_samples[experiment][1]

### Construct the empirical nominal distribution/ environment

In [7]:
ll_mu_hat    = np.array([0, 0, 0.1, 0.1, 0.3, 0.2]) 
ll_Sigma_hat = np.diag([0.5, 2.0, 1.0, 1.5, 0.8, 1.2])

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [8]:
iota0 = None  # No intervention

iota1 = ops.Intervention({Sm: 0})  
iota2 = ops.Intervention({Sm: 1})  

iota3 = ops.Intervention({LC: 0})  
iota4 = ops.Intervention({LC: 1})  

iota5 = ops.Intervention({Sm: 0, LC: 0})  
iota6 = ops.Intervention({Sm: 1, LC: 1})  
iota7 = ops.Intervention({Sm: 0, LC: 1}) 
iota8 = ops.Intervention({Sm: 1, LC: 0})  

iota9 = ops.Intervention({Gen: 0})  
iota10 = ops.Intervention({Gen: 1})  

iota11 = ops.Intervention({Gen: 0, Sm: 0})  
iota12 = ops.Intervention({Gen: 1, Sm: 1})  
iota13 = ops.Intervention({Gen: 0, Sm: 1})  
iota14 = ops.Intervention({Gen: 1, Sm: 0})  

iota15 = ops.Intervention({All: 0})  
iota16 = ops.Intervention({All: 1})  

iota17 = ops.Intervention({Cou: 0})  
iota18 = ops.Intervention({Cou: 1, Fat: 1})  
iota19 = ops.Intervention({Cou: 1, Fat: 0}) 
iota20 = ops.Intervention({Cou: 0, Fat: 1})  



eta0 = None  # No intervention

eta1 = ops.Intervention({Env: 0})  
eta2 = ops.Intervention({Env: 1})  
eta3 = ops.Intervention({Gen_: 0})  
eta4 = ops.Intervention({Gen_: 1})  

eta5 = ops.Intervention({Env: 0, Gen_: 0}) 
eta6 = ops.Intervention({Env: 1, Gen_: 1})  
eta7 = ops.Intervention({Env: 0, Gen_: 1})  
eta8 = ops.Intervention({Env: 1, Gen_: 0})  

eta9 = ops.Intervention({LC_: 0})  
eta10 = ops.Intervention({LC_: 1})  



omega = {
    iota0: eta0, 
    iota1: eta1,  
    iota2: eta2, 
    iota3: eta3,  
    iota4: eta4,  
    iota5: eta5,  
    iota6: eta6,  
    iota7: eta9,  
    iota8: eta10, 
    iota10: eta5, 
    iota11: eta6, 
    iota12: eta3, 
    iota13: eta4, 
    iota14: eta8, 
    iota15: eta7, 
    iota16: eta5, 
    iota17: eta6, 
    iota18: eta10, 
    iota19: eta9, 
    iota20: eta8 
}


Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

### Sampling and Pair construction

In [9]:
Dll_samples, Dll_noise = {}, {}
for iota in Ill_relevant:

    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    #Different Dll_noise for each iota
    lenv_iota         = mut.sample_distros_Gelbrich([(ll_mu_hat, ll_Sigma_hat)])[0] 
    Dll_noise[iota]   = lenv_iota.sample(num_llsamples)[0]
    Dll_samples[iota] = llcm.simulate(Dll_noise[iota], iota)

In [10]:
T = np.array([
    [2, 1, 1, 0.5, 1, 0.5],  
    [0.5, 2, 0.8, 1.5, 0.7, 1],  
    [1, 0.5, 1, 2, 0.9, 1.5]  
])

In [11]:
data_observational_hl      = Dll_samples[None]@ T.T
hl_endogenous_coeff_dict   = mut.get_coefficients(data_observational_hl, hl_causal_graph) 
U_hl, hl_mu_hat, hl_Sigma_hat = mut.lan_abduction(data_observational_hl, hl_causal_graph, hl_endogenous_coeff_dict)

In [12]:
Dhl_samples, Dhl_noise = {}, {}
for eta in Ihl_relevant:

    if eta is not None:
        hlcm              = lanm.LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
        lenv_eta         = mut.sample_distros_Gelbrich([(hl_mu_hat, hl_Sigma_hat)])[0] 
        Dhl_noise[eta]   = lenv_eta.sample(num_hlsamples)[0]
        Dhl_samples[eta] = hlcm.simulate(Dhl_noise[eta], eta)

    else:
        Dhl_noise[eta]   = U_hl
        Dhl_samples[eta] = data_observational_hl

In [14]:
Ds = {}
for iota in Ill_relevant:
    Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

In [15]:
joblib.dump((ll_causal_graph, Ill_relevant), f"data/{experiment}/LL.pkl")
joblib.dump(ll_endogenous_coeff_dict, f"data/{experiment}/ll_coeffs.pkl")

joblib.dump((hl_causal_graph, Ihl_relevant), f"data/{experiment}/HL.pkl")
joblib.dump(hl_endogenous_coeff_dict, f"data/{experiment}/hl_coeffs.pkl")

joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

joblib.dump(T, f"data/{experiment}/Tau.pkl")
joblib.dump(omega, f"data/{experiment}/omega.pkl")

joblib.dump((Dll_noise[None], ll_mu_hat, ll_Sigma_hat), f"data/{experiment}/exogenous_LL.pkl")
joblib.dump((U_hl, hl_mu_hat, hl_Sigma_hat), f"data/{experiment}/exogenous_HL.pkl")

['data/lucas6x3/exogenous_HL.pkl']