In [2]:
import joblib

import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
import evaluation_utils as evut
import params

### Define low-level DCM "LL" and high-level DCM "HL"

In [3]:
experiment = 'synth1.1'

In [4]:
S = 'Smoking'
T = 'Tar'
C = 'Cancer'

S_ = 'Smoking_'
C_ = 'Cancer_'

In [5]:
#ll_endogenous_coeff_dict = {(S, T): 0.3, (T, C): 0.2}
ll_endogenous_coeff_dict = {(S, T): 1.0, (T, C): .3} 
#ll_endogenous_coeff_dict = {(S, T): 4.0, (T, C): 3} 
#ll_endogenous_coeff_dict = {(S, T): 10.0, (T, C): 8.0} 

ll_causal_graph          = CBN(list(ll_endogenous_coeff_dict.keys()))
#nx.draw(nx.DiGraph(ll_causal_graph.edges()),with_labels=True)

hl_endogenous_coeff_dict = {(S_, C_): 0.0}
hl_causal_graph          = CBN(list(hl_endogenous_coeff_dict.keys()))
# #nx.draw(nx.DiGraph(hl_causal_graph.edges()),with_labels=True)

In [6]:
# Define the number of samples from the low-level environment.
num_llsamples   = params.n_samples['synth1'][0]
num_hlsamples   = params.n_samples['synth1'][1]

### Construct the empirical nominal distribution for the low-level model

In [7]:
ll_mu_hat    = np.array([0, 0, 0])  
ll_Sigma_hat = np.diag([1, 1, 1])  #np.diag([1, 2, 1]) 

hl_mu_hat    = np.array([0, 0]) 
hl_Sigma_hat = np.diag([1, 1]) #np.diag([1, 2])

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [8]:
iota0 = None
iota1 = ops.Intervention({S:0})
iota2 = ops.Intervention({S:0, T:1})
iota3 = ops.Intervention({S:1})
iota4 = ops.Intervention({S:1, T:0})
iota5 = ops.Intervention({S:1, T:1})

eta0 = None
eta1 = ops.Intervention({S_:0})
eta2 = ops.Intervention({S_:1})

omega = {   
            iota0: eta0,
            iota1: eta1,
            iota2: eta1,
            iota3: eta2,
            iota4: eta2,
            iota5: eta2
        }

Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

### Sampling and Pair construction

In [9]:
Dll_samples, Dll_noise = {}, {}
for iota in Ill_relevant:

    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    #Different Dll_noise for each iota
    lenv_iota         = mut.sample_distros_Gelbrich([(ll_mu_hat, ll_Sigma_hat)])[0] 
    Dll_noise[iota]   = lenv_iota.sample(num_llsamples)[0]
    Dll_samples[iota] = llcm.simulate(Dll_noise[iota], iota)

### Define the abstraction T

In [10]:
#T = np.array([[.1, .6, .2], [.6, .2, .7]])
T = np.array([[1, 2, 1], [0, 1, 0]])

### Compute the empirical nominal distribution for the high-level model and the linear coefficients

In [11]:
data_observational_hl         = Dll_samples[None]@ T.T
hl_endogenous_coeff_dict      = mut.get_coefficients(data_observational_hl, hl_causal_graph) 
U_hl, hl_mu_hat, hl_Sigma_hat = mut.lan_abduction(data_observational_hl, hl_causal_graph, hl_endogenous_coeff_dict)

In [12]:
# data_observational_hl    = Dll_samples[None]@ T.T
# U_hl                     = np.random.multivariate_normal(mean=hl_mu_hat, cov=hl_Sigma_hat, size=num_hlsamples)
# hl_endogenous_coeff_dict = mut.get_coefficients_with_known_noise(data_observational_hl, U_hl, hl_causal_graph)

### Generate samples for the high-level model

In [13]:
Dhl_samples, Dhl_noise = {}, {}
for eta in Ihl_relevant:

    if eta is not None:
        hlcm             = lanm.LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
        #Different Dll_noise for each iota
        lenv_eta         = mut.sample_distros_Gelbrich([(hl_mu_hat, hl_Sigma_hat)])[0] 
        Dhl_noise[eta]   = lenv_eta.sample(num_hlsamples)[0]
        Dhl_samples[eta] = hlcm.simulate(Dhl_noise[eta], eta)

    else:
        Dhl_noise[eta]   = U_hl
        Dhl_samples[eta] = data_observational_hl

In [14]:
# # Closer to exact abstraction
# Dhl_samples = {}
# list_of_iotas = []
# for iota in Ill_relevant:
    
#     if iota not in list_of_iotas:
#         list_of_iotas.append(omega[iota])
#         if iota is not None:
#             Dhl_samples[omega[iota]] = Dll_samples[iota] @ T.T
#         else:
#             Dhl_samples[omega[iota]] = data_observational_hl

In [15]:
Ds = {}
for iota in Ill_relevant:
    Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

### Save the data

In [20]:
joblib.dump((ll_causal_graph, Ill_relevant), f"data/{experiment}/LL.pkl")
joblib.dump(ll_endogenous_coeff_dict, f"data/{experiment}/ll_coeffs.pkl")

joblib.dump((hl_causal_graph, Ihl_relevant), f"data/{experiment}/HL.pkl")
joblib.dump(hl_endogenous_coeff_dict, f"data/{experiment}/hl_coeffs.pkl")

joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

joblib.dump(T, f"data/{experiment}/Tau.pkl")
joblib.dump(omega, f"data/{experiment}/omega.pkl")

joblib.dump((Dll_noise[None], ll_mu_hat, ll_Sigma_hat), f"data/{experiment}/exogenous_LL.pkl")
joblib.dump((U_hl, hl_mu_hat, hl_Sigma_hat), f"data/{experiment}/exogenous_HL.pkl")

['data/synth1.1/exogenous_HL.pkl']