In [1]:
import joblib

import numpy as np
import networkx as nx

from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops

import params

np.random.seed(0)

### Define low-level DCM "LL" and high-level DCM "HL"

In [2]:
experiment = 'little_lucas'

In [3]:
Anx = 'Anxiety'
PP  = 'Peer Pressure'
Sm  = 'Smoking'
Gen = 'Genetics'
LC  = 'Lung Cancer'
All = 'Allergy'
Cou = 'Coughing'
Fat = 'Fatigue'

Env = 'Environment'
Gen_ = 'Genetics_'
LC_  = 'Lung Cancer_'

In [4]:
ll_endogenous_coeff_dict = {(Anx,Sm):.2, (PP,Sm):.2, (Sm,LC):.2, (Gen,LC):.2, (All,Cou):.4, (LC,Cou):.6, (LC,Fat):.1, (Cou,Fat):.3, (Anx, Fat):.1}
ll_causal_graph          = CBN(list(ll_endogenous_coeff_dict.keys()))

hl_endogenous_coeff_dict = {(Env, LC_): .6, (Gen_, LC_):8}
hl_causal_graph          = CBN(list(hl_endogenous_coeff_dict.keys()))

In [5]:
#nx.draw(nx.DiGraph(ll_causal_graph.edges()),with_labels=True)
#nx.draw(nx.DiGraph(hl_causal_graph.edges()),with_labels=True)

In [6]:
ll_mu_hat       = np.array([0, 0, 0, 0, 0, 0, 0, 0])  
ll_Sigma_hat    = np.diag([1, 2, 1, 2, 4, 5, 6, 1]) 

hl_mu_hat       = np.array([0, 0, 0])  
hl_Sigma_hat    = np.diag([1, 1, 2])

# # Alternative way of initializing these randomly
# ll_mu_hat    = np.random.randn(8)  
# ll_Sigma_hat = np.diag(np.random.rand(8)) 

# hl_mu_hat    = np.random.randn(3)  
# hl_Sigma_hat = np.diag(np.random.rand(3))

ll_moments   = [(ll_mu_hat, ll_Sigma_hat)]
hl_moments   = [(hl_mu_hat, hl_Sigma_hat)]

lenv         = mut.sample_distros_Gelbrich(ll_moments)[0] 
henv         = mut.sample_distros_Gelbrich(hl_moments)[0]

In [7]:
# Define the radius of the Wasserstein balls (epsilon, delta) and the size for both models.
epsilon         = params.radius[experiment][0]
ll_num_envs     = params.n_envs[experiment][0]

delta           = params.radius[experiment][1]
hl_num_envs     = params.n_envs[experiment][1]

# Define the number of samples per environment. Currently every environment has the same number of samples
num_llsamples   = params.n_samples[experiment][0]
num_hlsamples   = params.n_samples[experiment][1]

### Construct the empirical nominal distribution/ environment

In [8]:
Dll_noise      = lenv.sample(num_llsamples)[0]
ll_environment = mut.get_exogenous_distribution(Dll_noise)

Dhl_noise      = henv.sample(num_hlsamples)[0]
hl_environment = mut.get_exogenous_distribution(Dhl_noise)

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [9]:
joblib.dump((Dll_noise, ll_mu_hat, ll_Sigma_hat), f"data/{experiment}/exogenous_LL.pkl")
joblib.dump((Dhl_noise, hl_mu_hat, hl_Sigma_hat), f"data/{experiment}/exogenous_HL.pkl")

['data/little_lucas/exogenous_HL.pkl']

### Define the sets of relevant interventions and the (total) surjective and order-preserving function $ω:I^{L} \mapsto I^{H}$

In [10]:
iota0 = None
iota1 = ops.Intervention({Anx: 0})
iota2 = ops.Intervention({Gen: 1})
iota3 = ops.Intervention({All: 0})
iota4 = ops.Intervention({Anx: 0, PP:0})
iota5 = ops.Intervention({Anx: 0, PP:0, Sm:0})
iota6 = ops.Intervention({Anx: 0, PP:0, Sm:1})


eta0 = None
eta1 = ops.Intervention({Env: 0})
eta2 = ops.Intervention({Env: 1})
eta3 = ops.Intervention({Gen_: 0})
eta4 = ops.Intervention({Gen_: 1})


omega = {   
            iota0: eta0,
            iota1: eta1,
            iota4: eta1,
            iota5: eta1,
            iota6: eta1,
            iota2: eta4,
            iota3: eta3
        }

Ill_relevant = list(set(omega.keys()))
Ihl_relevant = list(set(omega.values()))

In [11]:
joblib.dump((ll_causal_graph, Ill_relevant), f"data/{experiment}/LL.pkl")

joblib.dump((hl_causal_graph, Ihl_relevant), f"data/{experiment}/HL.pkl")

joblib.dump(omega, f"data/{experiment}/omega.pkl")

['data/little_lucas/omega.pkl']

### Sampling and Pair construction

In [12]:
Dll_samples = {}
for iota in Ill_relevant:
    llcm              = lanm.LinearAddSCM(ll_causal_graph, ll_endogenous_coeff_dict, iota)
    Dll_samples[iota] = llcm.sample_settings(Dll_noise)

Dhl_samples = {}
for eta in Ihl_relevant:
    hlcm             = lanm.LinearAddSCM(hl_causal_graph, hl_endogenous_coeff_dict, eta)
    Dhl_samples[eta] = hlcm.sample_settings(Dhl_noise)

Ds = {}
for iota in Ill_relevant:
    Ds[iota] = (Dll_samples[iota], Dhl_samples[omega[iota]])

In [13]:
joblib.dump(Ds, f"data/{experiment}/Ds.pkl")

['data/little_lucas/Ds.pkl']