In [None]:
!pip install dcor

In [None]:
import os
import sys
import numpy as np
import warnings
import pandas as pd
import matplotlib
from scipy import special
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

root_path = root_path = os.path.realpath('../../../..')
try:
    import auto_causality
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "auto-causality"))

from auto_causality import AutoCausality
from auto_causality.data_utils import preprocess_dataset

warnings.filterwarnings("ignore")

In [None]:
def iv_dgp_econml(n=5000, p=10, true_effect=10):

    X = np.random.normal(0, 1, size=(n, p))
    Z = np.random.binomial(1, 0.5, size=(n,))
    nu = np.random.uniform(0, 5, size=(n,))
    coef_Z = 0.8
    C = np.random.binomial(
        1, coef_Z * special.expit(0.4 * X[:, 0] + nu)
    )  # Compliers when recomended
    C0 = np.random.binomial(
        1, 0.006 * np.ones(X.shape[0])
    )  # Non-compliers when not recommended
    T = C * Z + C0 * (1 - Z)
    y = (
            true_effect(X) * T
            + 2 * nu
            + 5 * (X[:, 3] > 0)
            + 0.8 * np.random.uniform(0, 1, size=(n,))
    )
    cov = [f"x{i}" for i in range(1, X.shape[1] + 1)]
    df = pd.DataFrame(X, columns=cov)

    df["y"] = y
    df["treatment"] = T
    df["Z"] = Z

    return df

In [None]:
# Dataset parameters
treatment = "treatment"
targets = ["y"]
instruments = ["Z"]
outcome = targets[0]

In [None]:
TRUE_EFFECT = 5.5

# LINEAR_EFFECT = lambda X: TRUE_EFFECT
LINEAR_EFFECT = lambda X: TRUE_EFFECT  * ((X[:, 0] ** 2) * X[:, 1])
# LINEAR_EFFECT = lambda X: TRUE_EFFECT * ((X[:, 0] ** 2) * X[:, 1])

synth_data = iv_dgp_econml(n=50000, p=15, true_effect=LINEAR_EFFECT)
data_df, features_X, features_W = preprocess_dataset(
    synth_data, treatment, targets, instruments
)
df_train, df_test = train_test_split(data_df, test_size=0.2)

X_test = df_test[features_X[:2]]
df_train.head()

In [None]:
estimator_list = ["SimpleIV", "DMLIV", "ForestDRIV", "IntentToTreatDRIV"]

In [None]:
ac_forestdriv = AutoCausality(
    estimator_list=["ForestDriv", "IntentToTreatDRIV"],
    verbose=3,
    components_verbose=2,
    time_budget=14400,
    components_time_budget=300,
    propensity_model="auto"
)

ac_forestdriv.fit(df_train, treatment, outcome, features_X[2:], features_X[:2], instruments)

In [None]:
ac_itt = AutoCausality(
    estimator_list=["IntentToTreatDRIV"],
    verbose=3,
    components_verbose=2,
    time_budget=10800,
    components_time_budget=1000,
    propensity_model="auto"
)

ac_itt.fit(df_train, treatment, outcome, features_X[2:], features_X[:2], instruments)