# `causalml` - Meta-Learner Example Notebook
This notebook only contains regression examples.

In [1]:
# pick the right base path (only run ONCE)
import os
base_path = os.path.abspath("../causalml")
os.chdir(base_path)

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from xgboost import XGBRegressor, XGBClassifier
import warnings

# from causalml.inference.meta import XGBTLearner, MLPTLearner
from inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
from inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier
from inference.meta import LRSRegressor
from causalml.match import NearestNeighborMatch, MatchOptimizer, create_table_one
from causalml.propensity import ElasticNetPropensityModel
from causalml.dataset import *
from causalml.metrics import *

warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', lambda x: '%.4f' % x)

# imports from package
import logging
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
import statsmodels.api as sm
from copy import deepcopy

logger = logging.getLogger('causalml')
logging.basicConfig(level=logging.INFO)

%matplotlib inline

# Single Treatment Case

### Generate synthetic data

In [4]:
# Generate synthetic data using mode 1
y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=8, sigma=1.0)

treatment = np.array(['treatment_a' if val==1 else 'control' for val in treatment])

## S-Learner

### ATE

In [5]:
learner_s = BaseSRegressor(XGBRegressor(), control_name='control')
ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=False)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.1930
INFO:causalml:    RMSE (Treatment): 1.0938
INFO:causalml:   sMAPE   (Control): 0.6049
INFO:causalml:   sMAPE (Treatment): 0.7908
INFO:causalml:    Gini   (Control): 0.4568
INFO:causalml:    Gini (Treatment): 0.4024


In [6]:
ate_s

array([0.58329705])

In [7]:
learner_s._classes

{'treatment_a': 0}

### ATE w/ Confidence Intervals

In [8]:
alpha = 0.05
learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')
ate_s, ate_s_lb, ate_s_ub = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,
                               n_bootstraps=100, bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.1930
INFO:causalml:    RMSE (Treatment): 1.0938
INFO:causalml:   sMAPE   (Control): 0.6049
INFO:causalml:   sMAPE (Treatment): 0.7908
INFO:causalml:    Gini   (Control): 0.4568
INFO:causalml:    Gini (Treatment): 0.4024
INFO:causalml:11/100 bootstraps completed. (5s lapsed)
INFO:causalml:21/100 bootstraps completed. (10s lapsed)
INFO:causalml:31/100 bootstraps completed. (15s lapsed)
INFO:causalml:41/100 bootstraps completed. (20s lapsed)
INFO:causalml:51/100 bootstraps completed. (25s lapsed)
INFO:causalml:61/100 bootstraps completed. (31s lapsed)
INFO:causalml:71/100 bootstraps completed. (36s lapsed)
INFO:causalml:81/100 bootstraps completed. (42s lapsed)
INFO:causalml:91/100 bootstraps completed. (47s lapsed)


In [9]:
np.vstack((ate_s_lb, ate_s, ate_s_ub))

array([[0.35547776],
       [0.58329705],
       [0.79762868]])

### CATE

In [10]:
learner_s = BaseSRegressor(XGBRegressor(), control_name='control')
cate_s = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=False)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.1930
INFO:causalml:    RMSE (Treatment): 1.0938
INFO:causalml:   sMAPE   (Control): 0.6049
INFO:causalml:   sMAPE (Treatment): 0.7908
INFO:causalml:    Gini   (Control): 0.4568
INFO:causalml:    Gini (Treatment): 0.4024


In [11]:
cate_s

array([[0.47578371],
       [0.83733249],
       [0.38391006],
       ...,
       [0.47776198],
       [0.80010033],
       [0.80671191]])

### CATE w/ Confidence Intervals

In [12]:
alpha = 0.05
learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')
cate_s, cate_s_lb, cate_s_ub = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,
                               n_bootstraps=100, bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.1930
INFO:causalml:    RMSE (Treatment): 1.0938
INFO:causalml:   sMAPE   (Control): 0.6049
INFO:causalml:   sMAPE (Treatment): 0.7908
INFO:causalml:    Gini   (Control): 0.4568
INFO:causalml:    Gini (Treatment): 0.4024
INFO:causalml:11/100 bootstraps completed. (5s lapsed)
INFO:causalml:21/100 bootstraps completed. (10s lapsed)
INFO:causalml:31/100 bootstraps completed. (15s lapsed)
INFO:causalml:41/100 bootstraps completed. (20s lapsed)
INFO:causalml:51/100 bootstraps completed. (25s lapsed)
INFO:causalml:61/100 bootstraps completed. (30s lapsed)
INFO:causalml:71/100 bootstraps completed. (35s lapsed)
INFO:causalml:81/100 bootstraps completed. (40s lapsed)
INFO:causalml:91/100 bootstraps completed. (45s lapsed)


In [13]:
cate_s

array([[0.47578371],
       [0.83733249],
       [0.38391006],
       ...,
       [0.47776198],
       [0.80010033],
       [0.80671191]])

In [14]:
cate_s_lb

array([[0.25054919],
       [0.56307748],
       [0.21265524],
       ...,
       [0.25970175],
       [0.60951762],
       [0.56977587]])

In [15]:
cate_s_ub

array([[0.73046933],
       [0.99335149],
       [0.66894425],
       ...,
       [0.66676621],
       [1.11278922],
       [0.97594565]])

## T-Learner

### ATE w/ Confidence Intervals

In [16]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
ate_t, ate_t_lb, ate_t_ub = learner_t.estimate_ate(X=X, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9302
INFO:causalml:    RMSE (Treatment): 0.9360
INFO:causalml:   sMAPE   (Control): 0.7880
INFO:causalml:   sMAPE (Treatment): 0.4533
INFO:causalml:    Gini   (Control): 0.4904
INFO:causalml:    Gini (Treatment): 0.5341


In [17]:
np.vstack((ate_t_lb, ate_t, ate_t_ub))

array([[0.57599011],
       [0.61318656],
       [0.65038301]])

### CATE

In [18]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
cate_t = learner_t.fit_predict(X=X, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9302
INFO:causalml:    RMSE (Treatment): 0.9360
INFO:causalml:   sMAPE   (Control): 0.7880
INFO:causalml:   sMAPE (Treatment): 0.4533
INFO:causalml:    Gini   (Control): 0.4904
INFO:causalml:    Gini (Treatment): 0.5341


In [19]:
cate_t

array([[0.85184968],
       [0.97164452],
       [0.38533342],
       ...,
       [0.39375031],
       [1.43344426],
       [1.27798486]])

### CATE w/ Confidence Intervals

In [20]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
cate_t, cate_t_lb, cate_t_ub = learner_t.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=100,
                                                    bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9302
INFO:causalml:    RMSE (Treatment): 0.9360
INFO:causalml:   sMAPE   (Control): 0.7880
INFO:causalml:   sMAPE (Treatment): 0.4533
INFO:causalml:    Gini   (Control): 0.4904
INFO:causalml:    Gini (Treatment): 0.5341
INFO:causalml:10/100 bootstraps completed. (5s lapsed)
INFO:causalml:20/100 bootstraps completed. (10s lapsed)
INFO:causalml:30/100 bootstraps completed. (15s lapsed)
INFO:causalml:40/100 bootstraps completed. (20s lapsed)
INFO:causalml:50/100 bootstraps completed. (25s lapsed)
INFO:causalml:60/100 bootstraps completed. (29s lapsed)
INFO:causalml:70/100 bootstraps completed. (34s lapsed)
INFO:causalml:80/100 bootstraps completed. (39s lapsed)
INFO:causalml:90/100 bootstraps completed. (44s lapsed)


In [21]:
cate_t

array([[0.85184968],
       [0.97164452],
       [0.38533342],
       ...,
       [0.39375031],
       [1.43344426],
       [1.27798486]])

In [22]:
cate_t_lb

array([[-0.00668564],
       [ 0.63984069],
       [-0.03464244],
       ...,
       [-0.16005479],
       [ 0.34001098],
       [ 0.68983229]])

In [23]:
cate_t_ub

array([[1.68203196],
       [1.26681087],
       [0.94834577],
       ...,
       [0.67092188],
       [2.04236056],
       [1.70261656]])

## X-Learner

### ATE w/ Confidence Intervals

In [24]:
learner_x.t_groups

NameError: name 'learner_x' is not defined

In [25]:
learner_x = BaseXRegressor(XGBRegressor(), control_name='control')
ate_x, ate_x_lb, ate_x_ub = learner_x.estimate_ate(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9302
INFO:causalml:    RMSE (Treatment): 0.9360
INFO:causalml:   sMAPE   (Control): 0.7880
INFO:causalml:   sMAPE (Treatment): 0.4533
INFO:causalml:    Gini   (Control): 0.4904
INFO:causalml:    Gini (Treatment): 0.5341


In [26]:
np.vstack((ate_x_lb, ate_x, ate_x_ub))

array([[0.52829111],
       [0.56523823],
       [0.60218536]])

### CATE

### CATE w/ Confidence Intervals

In [27]:
learner_x = BaseXRegressor(XGBRegressor(), control_name='control')
cate_x, cate_x_lb, cate_x_ub = learner_x.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True,
                                                     n_bootstraps=100, bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9302
INFO:causalml:    RMSE (Treatment): 0.9360
INFO:causalml:   sMAPE   (Control): 0.7880
INFO:causalml:   sMAPE (Treatment): 0.4533
INFO:causalml:    Gini   (Control): 0.4904
INFO:causalml:    Gini (Treatment): 0.5341
INFO:causalml:10/100 bootstraps completed. (10s lapsed)
INFO:causalml:20/100 bootstraps completed. (20s lapsed)
INFO:causalml:30/100 bootstraps completed. (29s lapsed)
INFO:causalml:40/100 bootstraps completed. (39s lapsed)
INFO:causalml:50/100 bootstraps completed. (49s lapsed)
INFO:causalml:60/100 bootstraps completed. (58s lapsed)
INFO:causalml:70/100 bootstraps completed. (68s lapsed)
INFO:causalml:80/100 bootstraps completed. (78s lapsed)
INFO:causalml:90/100 bootstraps completed. (87s lapsed)


In [28]:
cate_x

array([[0.39778277],
       [0.72118205],
       [0.35821748],
       ...,
       [0.26484549],
       [1.54219508],
       [0.72743636]])

In [29]:
cate_x_lb

array([[ 0.02330786],
       [ 0.38030641],
       [ 0.03930511],
       ...,
       [-0.01951587],
       [ 0.86252127],
       [ 0.3520228 ]])

In [30]:
cate_x_ub

array([[1.16330491],
       [0.95974109],
       [0.67299869],
       ...,
       [0.68821538],
       [2.22851447],
       [1.26964058]])

## R-Learner

### ATE w/ Confidence Intervals

In [31]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
ate_r, ate_r_lb, ate_r_ub = learner_r.estimate_ate(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss


In [32]:
np.vstack((ate_r_lb, ate_r, ate_r_ub))

array([[0.53253662],
       [0.53293695],
       [0.53333729]])

### CATE

In [33]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
cate_r = learner_r.fit_predict(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss


In [34]:
cate_r

array([[0.68966258],
       [0.73078507],
       [0.43858752],
       ...,
       [0.18278325],
       [1.39403331],
       [0.74023998]])

### CATE w/ Confidence Intervals

In [35]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
cate_r, cate_r_lb, cate_r_ub = learner_r.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True,
                                                     n_bootstraps=100, bootstrap_size=3000)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss
INFO:causalml:10/100 bootstraps completed. (14s lapsed)
INFO:causalml:20/100 bootstraps completed. (28s lapsed)
INFO:causalml:30/100 bootstraps completed. (42s lapsed)
INFO:causalml:40/100 bootstraps completed. (55s lapsed)
INFO:causalml:50/100 bootstraps completed. (68s lapsed)
INFO:causalml:60/100 bootstraps completed. (81s lapsed)
INFO:causalml:70/100 bootstraps completed. (93s lapsed)
INFO:causalml:80/100 bootstraps completed. (106s lapsed)
INFO:causalml:90/100 bootstraps completed. (119s lapsed)


In [36]:
cate_r

array([[0.74433327],
       [0.74781644],
       [0.47970974],
       ...,
       [0.21244851],
       [1.22534442],
       [0.80403864]])

In [37]:
cate_r_lb

array([[-0.32421049],
       [ 0.13208617],
       [-0.23789642],
       ...,
       [-0.43033188],
       [-0.03593009],
       [ 0.1587713 ]])

In [38]:
cate_r_ub

array([[1.8828503 ],
       [1.26567189],
       [1.13657081],
       ...,
       [0.72818757],
       [2.33916125],
       [1.8088673 ]])

# Multiple Treatment Case

### Generate synthetic data
Note: we randomize the assignment of treatment flag AFTER the synthetic data generation process, so it doesn't make sense to measure accuracy metrics here. Next steps would be to include multi-treatment in the DGP itself.

In [39]:
# Generate synthetic data using mode 1
y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=8, sigma=1.0)

treatment = np.array([('treatment_a' if np.random.random() > 0.2 else 'treatment_b') 
                      if val==1 else 'control' for val in treatment])

e = {group: e for group in np.unique(treatment)}

In [40]:
pd.Series(treatment).value_counts()

control        4791
treatment_a    4195
treatment_b    1014
dtype: int64

## S-Learner

### ATE

In [41]:
learner_s = BaseSRegressor(XGBRegressor(), control_name='control')
ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=False)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.0589
INFO:causalml:    RMSE (Treatment): 1.0821
INFO:causalml:   sMAPE   (Control): 0.5291
INFO:causalml:   sMAPE (Treatment): 0.7627
INFO:causalml:    Gini   (Control): 0.4638
INFO:causalml:    Gini (Treatment): 0.4939
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 1.0490
INFO:causalml:    RMSE (Treatment): 1.0245
INFO:causalml:   sMAPE   (Control): 0.5241
INFO:causalml:   sMAPE (Treatment): 0.6571
INFO:causalml:    Gini   (Control): 0.4539
INFO:causalml:    Gini (Treatment): 0.5434


In [42]:
ate_s

array([0.40026052, 0.19945888])

In [43]:
learner_s._classes

{'treatment_a': 0, 'treatment_b': 1}

### ATE w/ Confidence Intervals
Note: S-Learner is the only learner that uses bootstrapping to get confidence intervals.

In [44]:
alpha = 0.05
learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')
ate_s, ate_s_lb, ate_s_ub = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,
                               n_bootstraps=100, bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.0589
INFO:causalml:    RMSE (Treatment): 1.0821
INFO:causalml:   sMAPE   (Control): 0.5291
INFO:causalml:   sMAPE (Treatment): 0.7627
INFO:causalml:    Gini   (Control): 0.4638
INFO:causalml:    Gini (Treatment): 0.4939
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 1.0490
INFO:causalml:    RMSE (Treatment): 1.0245
INFO:causalml:   sMAPE   (Control): 0.5241
INFO:causalml:   sMAPE (Treatment): 0.6571
INFO:causalml:    Gini   (Control): 0.4539
INFO:causalml:    Gini (Treatment): 0.5434
INFO:causalml:11/100 bootstraps completed. (10s lapsed)
INFO:causalml:21/100 bootstraps completed. (20s lapsed)
INFO:causalml:31/100 bootstraps completed. (30s lapsed)
INFO:causalml:41/100 bootstraps completed. (39s lapsed)
INFO:causalml:51/100 bootstraps completed. (49s lapsed)
INFO:causalml:61/100 bootstraps completed. (59s lapsed)
INFO:causalml:71/100 bootstraps completed. (68s 

In [45]:
np.vstack((ate_s_lb, ate_s, ate_s_ub))

array([[ 0.16878264, -0.02035488],
       [ 0.40026052,  0.19945888],
       [ 0.6049374 ,  0.4339356 ]])

### CATE

In [46]:
learner_s = BaseSRegressor(XGBRegressor(), control_name='control')
cate_s = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=False)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.0589
INFO:causalml:    RMSE (Treatment): 1.0821
INFO:causalml:   sMAPE   (Control): 0.5291
INFO:causalml:   sMAPE (Treatment): 0.7627
INFO:causalml:    Gini   (Control): 0.4638
INFO:causalml:    Gini (Treatment): 0.4939
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 1.0490
INFO:causalml:    RMSE (Treatment): 1.0245
INFO:causalml:   sMAPE   (Control): 0.5241
INFO:causalml:   sMAPE (Treatment): 0.6571
INFO:causalml:    Gini   (Control): 0.4539
INFO:causalml:    Gini (Treatment): 0.5434


In [47]:
cate_s

array([[ 0.40849626,  0.22764063],
       [ 0.32321846,  0.07951379],
       [ 0.27658069,  0.11914802],
       ...,
       [ 0.53736389,  0.19888401],
       [ 0.46584475,  0.18498182],
       [ 0.51777351, -0.13787472]])

### CATE w/ Confidence Intervals

In [48]:
alpha = 0.05
learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')
cate_s, cate_s_lb, cate_s_ub = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,
                               n_bootstraps=100, bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 1.0589
INFO:causalml:    RMSE (Treatment): 1.0821
INFO:causalml:   sMAPE   (Control): 0.5291
INFO:causalml:   sMAPE (Treatment): 0.7627
INFO:causalml:    Gini   (Control): 0.4638
INFO:causalml:    Gini (Treatment): 0.4939
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 1.0490
INFO:causalml:    RMSE (Treatment): 1.0245
INFO:causalml:   sMAPE   (Control): 0.5241
INFO:causalml:   sMAPE (Treatment): 0.6571
INFO:causalml:    Gini   (Control): 0.4539
INFO:causalml:    Gini (Treatment): 0.5434
INFO:causalml:11/100 bootstraps completed. (10s lapsed)
INFO:causalml:21/100 bootstraps completed. (20s lapsed)
INFO:causalml:31/100 bootstraps completed. (30s lapsed)
INFO:causalml:41/100 bootstraps completed. (40s lapsed)
INFO:causalml:51/100 bootstraps completed. (49s lapsed)
INFO:causalml:61/100 bootstraps completed. (59s lapsed)
INFO:causalml:71/100 bootstraps completed. (69s 

In [49]:
cate_s

array([[ 0.40849626,  0.22764063],
       [ 0.32321846,  0.07951379],
       [ 0.27658069,  0.11914802],
       ...,
       [ 0.53736389,  0.19888401],
       [ 0.46584475,  0.18498182],
       [ 0.51777351, -0.13787472]])

In [50]:
cate_s_lb

array([[ 0.10050442, -0.06904682],
       [ 0.19402468, -0.0149445 ],
       [ 0.09609928, -0.11995067],
       ...,
       [ 0.14240343,  0.03057296],
       [ 0.29008278,  0.01383644],
       [ 0.22229182, -0.44374911]])

In [51]:
cate_s_ub

array([[0.63702908, 0.4229527 ],
       [0.60949836, 0.32970906],
       [0.5453802 , 0.31526357],
       ...,
       [0.60835804, 0.37995659],
       [0.67825423, 0.34379383],
       [0.63276588, 0.38315574]])

## T-Learner

### ATE w/ Confidence Intervals

In [52]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
ate_t, ate_t_lb, ate_t_ub = learner_t.estimate_ate(X=X, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321


In [53]:
np.vstack((ate_t_lb, ate_t, ate_t_ub))

array([[0.38532091, 0.26594866],
       [0.42329493, 0.32114392],
       [0.46126894, 0.37633919]])

### CATE

In [54]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
cate_t = learner_t.fit_predict(X=X, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321


In [55]:
cate_t

array([[0.35081124, 0.33266389],
       [0.24868739, 0.05720544],
       [0.28636992, 0.17268085],
       ...,
       [0.4305191 , 0.51081216],
       [0.09683752, 0.71547627],
       [0.43761277, 0.06787491]])

### CATE w/ Confidence Intervals

In [56]:
learner_t = BaseTRegressor(XGBRegressor(), control_name='control')
cate_t, cate_t_lb, cate_t_ub = learner_t.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=100,
                                                    bootstrap_size=5000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321
INFO:causalml:10/100 bootstraps completed. (11s lapsed)
INFO:causalml:20/100 bootstraps completed. (20s lapsed)
INFO:causalml:30/100 bootstraps completed. (30s lapsed)
INFO:causalml:40/100 bootstraps completed. (40s lapsed)
INFO:causalml:50/100 bootstraps completed. (50s lapsed)
INFO:causalml:60/100 bootstraps completed. (60s lapsed)
INFO:causalml:70/100 bootstraps completed. (69s 

In [57]:
cate_t

array([[0.35081124, 0.33266389],
       [0.24868739, 0.05720544],
       [0.28636992, 0.17268085],
       ...,
       [0.4305191 , 0.51081216],
       [0.09683752, 0.71547627],
       [0.43761277, 0.06787491]])

In [58]:
cate_t_lb

array([[-0.08144539, -0.59744868],
       [ 0.01799162, -0.38125419],
       [-0.22445461, -0.29835402],
       ...,
       [ 0.02081216, -0.26958241],
       [ 0.02807788, -0.37620527],
       [-0.3921815 , -1.83219032]])

In [59]:
cate_t_ub

array([[1.00166508, 1.12106158],
       [0.87668523, 0.79276754],
       [0.97333835, 0.96414106],
       ...,
       [1.10926274, 1.15256835],
       [0.96282631, 1.43377033],
       [1.52587242, 1.20193017]])

## X-Learner

### ATE w/ Confidence Intervals

In [60]:
learner_x = BaseXRegressor(XGBRegressor(), control_name='control')
ate_x, ate_x_lb, ate_x_ub = learner_x.estimate_ate(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321


In [61]:
np.vstack((ate_x_lb, ate_x, ate_x_ub))

array([[0.37562191, 0.2659991 ],
       [0.41337235, 0.32093644],
       [0.4511228 , 0.37587377]])

### CATE

In [62]:
learner_x = BaseXRegressor(XGBRegressor(), control_name='control')
cate_x = learner_x.fit_predict(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321


In [63]:
cate_x

array([[ 0.52527982,  0.39769229],
       [ 0.42821503,  0.20122311],
       [ 0.34181523,  0.46731031],
       ...,
       [ 0.43081468,  0.73125267],
       [ 0.18535542,  0.24842119],
       [ 0.82043529, -0.24316168]])

### CATE w/ Confidence Intervals

In [65]:
learner_x = BaseXRegressor(XGBRegressor(), control_name='control')
cate_x, cate_x_lb, cate_x_ub = learner_x.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True,
                                                     n_bootstraps=20, bootstrap_size=1000)

INFO:causalml:Error metrics for group treatment_a
INFO:causalml:    RMSE   (Control): 0.9823
INFO:causalml:    RMSE (Treatment): 0.9195
INFO:causalml:   sMAPE   (Control): 0.7765
INFO:causalml:   sMAPE (Treatment): 0.4508
INFO:causalml:    Gini   (Control): 0.5388
INFO:causalml:    Gini (Treatment): 0.5466
INFO:causalml:Error metrics for group treatment_b
INFO:causalml:    RMSE   (Control): 0.9903
INFO:causalml:    RMSE (Treatment): 0.8239
INFO:causalml:   sMAPE   (Control): 0.6674
INFO:causalml:   sMAPE (Treatment): 0.4277
INFO:causalml:    Gini   (Control): 0.5606
INFO:causalml:    Gini (Treatment): 0.7321
INFO:causalml:10/20 bootstraps completed. (6s lapsed)


In [66]:
learner_x._classes

{'treatment_a': 0, 'treatment_b': 1}

In [67]:
cate_x

array([[ 0.52527982,  0.39769229],
       [ 0.42821503,  0.20122311],
       [ 0.34181523,  0.46731031],
       ...,
       [ 0.43081468,  0.73125267],
       [ 0.18535542,  0.24842119],
       [ 0.82043529, -0.24316168]])

In [68]:
cate_x_lb

array([[-0.11367871, -0.23758274],
       [-0.06715126, -0.04743222],
       [-0.30945642, -0.76316142],
       ...,
       [-0.06676014, -0.19030679],
       [-0.38418343, -0.44362469],
       [-0.59307305, -1.251716  ]])

In [69]:
cate_x_ub

array([[1.06644071, 1.33659313],
       [1.14413551, 1.18298175],
       [0.95327085, 1.49227993],
       ...,
       [0.87317408, 1.02219927],
       [0.99992956, 0.78829483],
       [1.52405493, 1.59690447]])

## R-Learner

### ATE w/ Confidence Intervals

In [70]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
ate_r, ate_r_lb, ate_r_ub = learner_r.estimate_ate(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss
INFO:causalml:training the treatment effect model for treatment_b with R-loss


In [71]:
np.vstack((ate_r_lb, ate_r, ate_r_ub))

array([[0.34231668, 0.14392736],
       [0.34272427, 0.14461389],
       [0.34313186, 0.14530041]])

### CATE

In [72]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
cate_r = learner_r.fit_predict(X=X, p=e, treatment=treatment, y=y)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss
INFO:causalml:training the treatment effect model for treatment_b with R-loss


In [73]:
cate_r

array([[ 0.47026676,  0.15366212],
       [ 0.30793339,  0.24790418],
       [ 0.44024304,  0.42022982],
       ...,
       [ 0.32796681,  0.56629455],
       [ 0.1415191 , -0.04005969],
       [ 0.31458521, -0.25984585]])

### CATE w/ Confidence Intervals

In [74]:
learner_r = BaseRRegressor(XGBRegressor(), control_name='control')
cate_r, cate_r_lb, cate_r_ub = learner_r.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True,
                                                     n_bootstraps=20, bootstrap_size=1000)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for treatment_a with R-loss
INFO:causalml:training the treatment effect model for treatment_b with R-loss
INFO:causalml:10/20 bootstraps completed. (6s lapsed)


In [75]:
cate_r

array([[ 0.48988515,  0.20426157],
       [ 0.37352651,  0.18254063],
       [ 0.47350597,  0.45746055],
       ...,
       [ 0.38196987,  0.61016506],
       [ 0.12566453, -0.03671098],
       [-0.07329565, -0.29935843]])

In [76]:
cate_r_lb

array([[-0.50216097, -0.87867155],
       [-0.23306711, -0.28041615],
       [-0.4162818 , -0.51882144],
       ...,
       [-0.11726523, -0.46684388],
       [-0.39257423, -0.63865196],
       [-1.09914327, -1.30770688]])

In [77]:
cate_r_ub

array([[1.42674825, 1.39832048],
       [0.78051882, 0.5501123 ],
       [1.93342747, 1.03070981],
       ...,
       [1.05792255, 1.48453925],
       [0.89899277, 0.30263484],
       [1.55487377, 1.1365593 ]])

# Visualize

In [78]:
groups

NameError: name 'groups' is not defined

In [None]:
groups = learner_r._classes

alpha = 1
linewidth = 2
bins = 30
for group,idx in sorted(groups.items(), key=lambda x: x[1]):
    plt.figure(figsize=(12,8))
    plt.hist(cate_t[:,idx], alpha=alpha, bins=bins, label='T Learner ({})'.format(group),
             histtype='step', linewidth=linewidth)
    plt.hist(cate_x[:,idx], alpha=alpha, bins=bins, label='X Learner ({})'.format(group),
             histtype='step', linewidth=linewidth)
    plt.hist(cate_r[:,idx], alpha=alpha, bins=bins, label='R Learner ({})'.format(group),
             histtype='step', linewidth=linewidth)
    plt.vlines(cate_s[0,idx], 0, plt.axes().get_ylim()[1], label='S Learner ({})'.format(group),
               linestyles='dotted', linewidth=linewidth)
    plt.title('Distribution of CATE Predictions for {}'.format(group))
    plt.xlabel('Individual Treatment Effect (ITE/CATE)')
    plt.ylabel('# of Samples')
    _=plt.legend()