In [1]:
import joblib
import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops

import params

np.random.seed(0)

In [12]:
import torch

### Define low-level DCM "LL" and high-level DCM "HL"

In [2]:
experiment = 'synth1_gnd'

In [3]:
S = 'Smoking'
T = 'Tar'
C = 'Cancer'

S_ = 'Smoking_'
C_ = 'Cancer_'

In [4]:
ll_endogenous_coeff_dict = {(S, T): 0.3, (T, C): 0.2}
ll_causal_graph          = CBN(list(ll_endogenous_coeff_dict.keys()))
#nx.draw(nx.DiGraph(ll_causal_graph.edges()),with_labels=True)

hl_endogenous_coeff_dict = {(S_, C_): 0.6}
hl_causal_graph          = CBN(list(hl_endogenous_coeff_dict.keys()))
#nx.draw(nx.DiGraph(hl_causal_graph.edges()),with_labels=True)

In [5]:
# Define the number of samples from the low-level environment.
num_llsamples   = params.n_samples[experiment][0]
num_hlsamples   = params.n_samples[experiment][1]

### Construct the empirical nominal distribution/ environment

In [6]:
ll_loc    = [0, 2, -1]    
ll_scale  = [1, 0.5, 2]
ll_shape  = [2, 2, 2] # beta = 2 => Laplace, beta--> inf => Uniform

# hl_loc    = [0, 1]
# hl_scale  = [1, 1]
# hl_shape  = [2, 2]

# # ll_moments = (ll_loc, ll_scale)
# # hl_moments = (hl_loc, hl_scale)
# lenv      = ops.MultivariateGeneralizedNormal(ll_loc, ll_scale, ll_shape)
# henv      = ops.MultivariateGeneralizedNormal(hl_loc, hl_scale, hl_shape)

# # lenv      = ops.MultivariateLaplace(ll_loc, ll_scale)
# # henv      = ops.MultivariateLaplace(hl_loc, hl_scale)

# Dll_noise = lenv.sample(10000)
# Dhl_noise = henv.sample(10000)

# # ll_environment = mut.get_exogenous_distribution(Dll_noise)
# # hl_environment = mut.get_exogenous_distribution(Dhl_noise)

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [7]:
iota0 = None
iota1 = ops.Intervention({S:0})
iota2 = ops.Intervention({S:0, T:1})
iota3 = ops.Intervention({S:1})
iota4 = ops.Intervention({S:1, T:0})
iota5 = ops.Intervention({S:1, T:1})

eta0 = None
eta1 = ops.Intervention({S_:0})
eta2 = ops.Intervention({S_:1})

omega = {   
            iota0: eta0,
            iota1: eta1,
            iota2: eta1,
            iota3: eta2,
            iota4: eta2,
            iota5: eta2
        }

Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

### Sampling and Pair construction

In [8]:
Dll_samples, Dll_noise = {}, {}
for iota in Ill_relevant:

    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    #Different Dll_noise for each iota
    lenv_iota         = ops.MultivariateLaplace(ll_loc, ll_scale)
    
    Dll_noise[iota]   = lenv_iota.sample(num_llsamples)
    Dll_samples[iota] = llcm.simulate(Dll_noise[iota], iota)

In [9]:
noise, loc, scale = mut.lan_abduction_laplace(Dll_samples[None], ll_causal_graph, ll_endogenous_coeff_dict)

In [10]:
loc

array([-1.35878524e-03,  2.00436547e+00, -1.00857800e+00])

In [14]:
# Define ground truth abstraction
T = np.array([[1, 2, 1], [0, 1, 0]])

In [None]:
data_observational_hl        = Dll_samples[None]@ T.T
hl_endogenous_coeff_dict      = mut.get_coefficients(data_observational_hl, hl_causal_graph) 
U_hl, hl_mu_hat, hl_Sigma_hat = mut.lan_abduction(data_observational_hl, hl_causal_graph, hl_endogenous_coeff_dict)

In [21]:
Dll_samples = {}
for iota in Ill_relevant:
    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    Dll_samples[iota] = llcm.sample_settings(Dll_noise)

Dhl_samples = {}
for eta in Ihl_relevant:
    hlcm             = lanm.LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
    Dhl_samples[eta] = hlcm.sample_settings(Dhl_noise)

Ds = {}
for iota in Ill_relevant:
    Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

In [22]:
joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

['data/synth1_gnd/Ds.pkl']

In [23]:
# LLmodels, Dll_samples = {}, {}
# for iota in Ill_relevant:

#     LLmodels[iota]      = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
#     Dll_samples[iota]   = LLmodels[iota].sample_settings(Dll_noise)

# HLmodels, Dhl_samples = {}, {}
# for eta in Ihl_relevant:

#     HLmodels[eta]      = lanm.LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
#     Dhl_samples[eta]   = HLmodels[eta].sample_settings(Dhl_noise)


# pairs = mut.create_pairs(Ill_relevant, omega, LLmodels, HLmodels)

# Ds = {}
# for iota in Ill_relevant:
#     Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

In [24]:
# joblib.dump(pairs, f"data/{experiment}/pairs.pkl")
# joblib.dump(Ds, f"data/{experiment}/Ds.pkl")