In [1]:
import numpy as np
import cvxpy as cp 
from src.CBN import CausalBayesianNetwork as CBN
import modularised_utils as mut
import Linear_Additive_Noise_Models as lanm
import operations as ops
from tqdm import tqdm

from scipy.stats import wasserstein_distance_nd

import params

np.random.seed(0)

In [2]:
experiment = 'synth1_gnd'
#experiment = 'synth1'

In [3]:
# Define the radius of the Wasserstein balls (epsilon, delta) and the size for both models.
epsilon         = params.radius[experiment][0]
ll_num_envs     = params.n_envs[experiment][0]

delta           = params.radius[experiment][1]
hl_num_envs     = params.n_envs[experiment][1]

# Define the number of samples per environment. Currently every environment has the same number of samples
num_llsamples   = params.n_samples[experiment][0]
num_hlsamples   = params.n_samples[experiment][1]

In [5]:
Dll      = mut.load_samples(experiment)[None][0]
Gll, Ill = mut.load_model(experiment, 'LL')

Dhl = mut.load_samples(experiment)[None][1]
Ghl, Ihl = mut.load_model(experiment, 'HL')

omega = mut.load_omega_map(experiment)

num_llvars      = Dll.shape[1]
num_hlvars      = Dhl.shape[1]

In [6]:
ll_coeffs = mut.get_mle_coefficients_gmm(Dll, Gll, weights=None, n_components=num_llvars)
hl_coeffs = mut.get_mle_coefficients_gmm(Dhl, Ghl, weights=None, n_components=num_hlvars)

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [7]:
U_ll_hat, mu_U_ll_hat, Sigma_U_ll_hat = mut.lan_abduction(Dll, Gll, ll_coeffs)
U_hl_hat, mu_U_hl_hat, Sigma_U_hl_hat = mut.lan_abduction(Dhl, Ghl, hl_coeffs)

A_ll = mut.generate_perturbed_datasets(D = U_ll_hat, bound = epsilon, num_envs = ll_num_envs) #Low-level: A_epsilon
A_hl = mut.generate_perturbed_datasets(D = U_hl_hat, bound = delta, num_envs = hl_num_envs) #High-level A_delta

In [8]:
LLmodels = {}
for iota in Ill:
    LLmodels[iota] = lanm.LinearAddSCM(Gll, ll_coeffs, iota)
    
HLmodels, Dhl_samples = {}, {}
for eta in Ihl:
    HLmodels[eta] = lanm.LinearAddSCM(Ghl, hl_coeffs, eta)

In [13]:
U_L = U_ll_hat
U_H = U_hl_hat

num_samples, n = U_L.shape
num_samples, m = U_H.shape

epsilon = 0.5  # Radius of the Wasserstein ball for the low-level model
delta   = 0.5 # Radius of the Wasserstein ball for the high-level model
alpha   = 0.0001 # Learning rate for ascent steps in Theta and Phi

# Initialize variables
T     = np.random.rand(m, n)
Theta = np.random.rand(num_samples, n)
Phi   = np.random.rand(num_samples, m)

# Project onto Frobenius ball function
def project_onto_frobenius_ball(matrix, radius):
    norm = np.linalg.norm(matrix, 'fro')
    if norm > radius:
        return matrix * (radius / norm)
    return matrix

# Update function for T 
def update_T(U_L, U_H, Theta, Phi):
    T_var = cp.Variable((m, n), nonneg=True)
    objective = 0
    for iota in Ill:
        Li = LLmodels[iota]._compute_reduced_form() 
        Hi = HLmodels[omega[iota]]._compute_reduced_form()
        A  = T_var @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)

        objective += cp.norm(A, "fro")**2

    objective = cp.Minimize(objective / num_samples)
    prob      = cp.Problem(objective)
    prob.solve()
    return T_var.value

# Gradient ascent step for Theta
def ascent_step_Theta(U_L, U_H, T, Phi, Theta, epsilon, alpha):
    gradient = np.zeros_like(Theta)
    for iota in Ill:
        Li = LLmodels[iota]._compute_reduced_form() 
        Hi = HLmodels[omega[iota]]._compute_reduced_form()
        A  = T @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)

        gradient += ((T @ Li).T @ A).T  # Compute gradient wrt Theta

    gradient /= num_samples
    Theta += alpha * gradient  # Ascent step
    return project_onto_frobenius_ball(Theta, np.sqrt(num_samples * epsilon**2))

# Gradient ascent step for Phi
def ascent_step_Phi(U_L, U_H, T, Theta, Phi, delta, alpha):
    gradient = np.zeros_like(Phi)
    for iota in Ill:
        Li = LLmodels[iota]._compute_reduced_form() 
        Hi = HLmodels[omega[iota]]._compute_reduced_form()
        A  = T @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)

        gradient += (Hi @ A).T  # Compute gradient wrt Phi

    gradient /= num_samples
    Phi += alpha * gradient  # Ascent step
    return project_onto_frobenius_ball(Phi, np.sqrt(num_samples * delta**2))

# Main optimization loop
max_iters = 100
tol = 1e-5

for iteration in tqdm(range(max_iters)):
    T_prev, Theta_prev, Phi_prev = T.copy(), Theta.copy(), Phi.copy()

    # Minimize wrt T
    T = update_T(U_L, U_H, Theta, Phi)

    # Maximize wrt Theta and Phi using gradient ascent
    Theta = ascent_step_Theta(U_L, U_H, T, Phi, Theta, epsilon, alpha)
    Phi   = ascent_step_Phi(U_L, U_H, T, Theta, Phi, delta, alpha)

    # Check for convergence
    if (np.linalg.norm(T - T_prev, 'fro') < tol and
        np.linalg.norm(Theta - Theta_prev, 'fro') < tol and
        np.linalg.norm(Phi - Phi_prev, 'fro') < tol):
        print(f"Converged in {iteration + 1} iterations.")
        break

# Final optimized values of T, Theta, and Phi
print("Optimized T:", T)
print("Optimized Theta:", Theta)
print("Optimized Phi:", Phi)

  0%|          | 0/100 [00:00<?, ?it/s]

  2%|▏         | 2/100 [00:03<02:27,  1.51s/it]

Converged in 3 iterations.
Optimized T: [[1.52829830e-02 4.73341307e-01 1.25829370e-11]
 [2.26566373e-02 5.95564466e-01 3.49693126e-12]]
Optimized Theta: [[0.35147087 0.47811262 0.37771377]
 [0.19655297 0.04600236 0.4856553 ]
 [0.24309754 0.21838081 0.27136206]
 ...
 [0.38433809 0.07658456 0.07209259]
 [0.27574949 0.16564704 0.40556579]
 [0.48183323 0.12395806 0.41800125]]
Optimized Phi: [[0.58498078 0.11461105]
 [0.12657994 0.23646488]
 [0.46489155 0.52252822]
 ...
 [0.17574303 0.17407436]
 [0.07207346 0.05444351]
 [0.16153937 0.13609068]]





In [14]:
# Tau  = mut.load_T(experiment)
# #Tau = torch.from_numpy(Tau).float()
# diff = torch.norm(T-Tau, 'fro')

# print(f"Estimated T =  {T}, \n \nGround Truth T = {Tau} \n \nFrobenius Distance = {diff}")

Estimated T =  [[1.52829830e-02 4.73341307e-01 1.25829370e-11]
 [2.26566373e-02 5.95564466e-01 3.49693126e-12]], 
 
Ground Truth T = [[1 2 1]
 [0 1 0]] 
 
Frobenius Distance = 0


In [None]:
x_sample = np.array([1.5, 2.5, 3.5])
mapped_point = T @ x_sample
print(f'{x_sample} maps to {mapped_point}')

In [None]:
#WITHOUT CVXPY
U_L = U_ll_hat
U_H = U_hl_hat

num_samples, n = U_L.shape
num_samples, m = U_H.shape

# Parameters
epsilon = 1.0
delta = 1.0
alpha = 0.01  # Learning rate for ascent steps in Theta and Phi
learning_rate_T = 0.001  # Learning rate for descent step in T

# Initialize primal and dual variables
T = np.random.rand(m, n)
Theta = np.random.rand(num_samples, n)
Phi = np.random.rand(num_samples, m)

# Define a function to project onto Frobenius ball
def project_onto_frobenius_ball(matrix, radius):
    norm = np.linalg.norm(matrix, 'fro')
    if norm > radius:
        return matrix * (radius / norm)
    return matrix

# Define a function to project onto the non-negative orthant
def project_onto_non_negative(matrix):
    return np.maximum(matrix, 0)

# Gradient descent step for T
def descent_step_T(U_L, U_H, T, Theta, Phi, learning_rate_T):
    gradient = np.zeros_like(T)
    for iota in Ill:
        Li = LLmodels[iota].compute_mechanism() 
        Hi = HLmodels[omega[iota]].compute_mechanism()
        A = T @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)
        
        # Compute gradient with respect to T
        gradient += A @ (Li @ (U_L.T + Theta.T)).T
        
    gradient /= num_samples
    T = T - learning_rate_T * gradient  # Gradient descent step
    return project_onto_non_negative(T)  # Ensure non-negativity

# Gradient ascent step for Theta
def ascent_step_Theta(U_L, U_H, T, Phi, Theta, epsilon, alpha):
    gradient = np.zeros_like(Theta)
    for iota in Ill:
        Li = LLmodels[iota].compute_mechanism() 
        Hi = HLmodels[omega[iota]].compute_mechanism()
        A = T @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)

        gradient += ((T @ Li).T @ A).T  # Compute gradient wrt Theta

    gradient /= num_samples
    Theta += alpha * gradient  # Ascent step
    return project_onto_frobenius_ball(Theta, np.sqrt(num_samples * epsilon**2))

# Gradient ascent step for Phi
def ascent_step_Phi(U_L, U_H, T, Theta, Phi, delta, alpha):
    gradient = np.zeros_like(Phi)
    for iota in Ill:
        Li = LLmodels[iota].compute_mechanism() 
        Hi = HLmodels[omega[iota]].compute_mechanism()
        A = T @ Li @ (U_L.T + Theta.T) - Hi @ (U_H.T + Phi.T)

        gradient += (Hi @ A).T  # Compute gradient wrt Phi

    gradient /= num_samples
    Phi += alpha * gradient  # Ascent step
    return project_onto_frobenius_ball(Phi, np.sqrt(num_samples * delta**2))

# Main optimization loop
max_iters = 100
tol = 1e-4

for iteration in range(max_iters):
    T_prev, Theta_prev, Phi_prev = T.copy(), Theta.copy(), Phi.copy()

    # Gradient descent step for T
    T = descent_step_T(U_L, U_H, T, Theta, Phi, learning_rate_T)

    # Gradient ascent steps for Theta and Phi
    Theta = ascent_step_Theta(U_L, U_H, T, Phi, Theta, epsilon, alpha)
    Phi = ascent_step_Phi(U_L, U_H, T, Theta, Phi, delta, alpha)

    # Check for convergence
    if (np.linalg.norm(T - T_prev, 'fro') < tol and
        np.linalg.norm(Theta - Theta_prev, 'fro') < tol and
        np.linalg.norm(Phi - Phi_prev, 'fro') < tol):
        print(f"Converged in {iteration + 1} iterations.")
        break

# Final optimized values of T, Theta, and Phi
print("Optimized T:", T)
print("Optimized Theta:", Theta)
print("Optimized Phi:", Phi)


In [14]:
# abstraction_errors             = {}
# abstraction_env_errors         = {}
# max_env_avg_interv_error_value = -np.inf
# max_env_avg_interv_error_key   = None

# for lenv in A_ll:
#     for henv in A_hl:
#         total_ui_error = 0
#         num_distros    = len(Ill)

#         T  = mut.sample_stoch_matrix(num_hlvars, num_llvars) # sample the abstraction map/matrix

#         for iota in Ill:
#             llcm   = LLmodels[iota]
#             hlcm   = HLmodels[omega[iota]]
#             llmech = llcm.compute_mechanism()
#             hlmech = hlcm.compute_mechanism()

#             lefthh = T @ (llmech @ lenv.T)
#             righthh = hlmech @ henv.T
#             #print(rig)
#             error = wasserstein_distance_nd(lefthh, righthh)
#             #error = mut.mat_jsd_distance(T@(llmech @ lenv.T), hlmech @ henv.T)
#             #error = mut.mat_ot_wasserstein_distance(T@(llmech @ lenv.T), hlmech @ henv.T)
#             #error  = mut.mat_wasserstein_distance(T@(llmech @ lenv.T), hlmech @ henv.T)
            
#             #print(error,'\n')
#             total_ui_error += error

#         avg_interv_error = total_ui_error/num_distros

#         if avg_interv_error > max_env_avg_interv_error_value:
#             max_env_avg_interv_error_value = avg_interv_error
#             max_env_avg_interv_error_key   = (lenv, henv)

#         abstraction_errors[str(T)] = avg_interv_error
#         #abstraction_env_errors['ll: '+str(ll_environment.means_)+' hl: '+str(hl_environment.means_)] = avg_interv_error

# max_tau   = max(abstraction_errors, key=abstraction_errors.get)
# max_error = abstraction_errors[max_tau]

# print(f"Abstraction: {max_tau}, Error: {max_error}")
# max_lenv = max_env_avg_interv_error_key[0]
# max_henv = max_env_avg_interv_error_key[1]