In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression


In [48]:
# Data Generation
def generate_data(n=1000, seed=0, beta1=1.05, alpha1=0.4, alpha2=0.3, binary_treatment=True, binary_cutoff=3.5):
    np.random.seed(seed)
    age = np.random.normal(65, 5, n)
    sodium = age / 18 + np.random.normal(size=n)
    if binary_treatment:
        if binary_cutoff is None:
            binary_cutoff = sodium.mean()
        sodium = (sodium > binary_cutoff).astype(int)

    blood_pressure = beta1 * sodium + 2 * age + np.random.normal(size=n)
    proteinurin = alpha1 * sodium + alpha2 * blood_pressure + np.random.normal(size=n)
    hypertension = (blood_pressure >= 140).astype(int)  # not used, but could be used for binary outcomes
        
    outDF = pd.DataFrame(
                    {
                        'blood_pressure': blood_pressure, 
                        'sodium': sodium,
                        'age': age, 
                        'proteinurin': proteinurin}
            )
        
    return outDF       

In [37]:
def estimate_causal_effect(Xt, y, model=LinearRegression(), treatment_idx=0, regression_coef=False):
    model.fit(Xt, y)
    if regression_coef:
        return model.coef_[treatment_idx]
    else:
        Xt1 = pd.DataFrame.copy(Xt)
        Xt1[Xt.columns[treatment_idx]] = 1
        Xt0 = pd.DataFrame.copy(Xt)
        Xt0[Xt.columns[treatment_idx]] = 0
    
    return (model.predict(Xt1) - model.predict(Xt0)).mean()


In [51]:
binary_t_df = generate_data(beta1=1.05, alpha1=.4, alpha2=.3, binary_treatment=True, n=10000000)
continuous_t_df = generate_data(beta1=1.05, alpha1=.4, alpha2=.3, binary_treatment=False, n=10000000)

In [49]:
binary_t_df.head(5)

Unnamed: 0,blood_pressure,sodium,age,proteinurin
0,146.809261,1,73.820262,43.118187
1,133.14517,0,67.000786,37.669696
2,139.28964,0,69.89369,41.37032
3,153.894444,1,76.204466,47.021801
4,150.615711,1,74.33779,46.574317


In [50]:
generate_data(beta1=1.05, alpha1=.4, alpha2=.3, binary_treatment=False, n=100).head(1)

Unnamed: 0,blood_pressure,sodium,age,proteinurin
0,153.554832,5.984276,73.820262,47.153633
