In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV, MultiTaskElasticNetCV, ElasticNetCV, LogisticRegression
from sklearn.metrics import f1_score, accuracy_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from xgboost import XGBClassifier, XGBRegressor

import seaborn as sns

from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.svm import SVC

In [2]:
import logging
logger = logging.getLogger('causalml')
logging.basicConfig(level=logging.INFO)

In [3]:
data_path='../Processed/final/'
save_path='../Processed/final/'

# Import data

In [4]:
(Y_train, T_train, X_train, W_train)=pickle.load(open(data_path+'YTXW_train.pkl','rb'))
(Y_test, T_test, X_test, W_test)=pickle.load(open(data_path+'YTXW_test.pkl','rb'))
(Y_val, T_val, X_val, W_val)=pickle.load(open(data_path+'YTXW_val.pkl','rb'))

In [5]:
le_dx=pickle.load(open(data_path+'le_dx.pkl','rb'))
le_patid=pickle.load(open(data_path+'le_patid.pkl','rb'))
selected_patient_feature=['age_onset','obs_win','female']+['race__'+c for c in ['A','B','H','U','W']]
rx2id = pickle.load(open(data_path+'drug_dict.pkl', 'rb'))

# Process Y,T,W,X

Several steps:
- Standardize age
- Combine demographics with dx information 
- Set treatment and control group
- Output a dataframe

In [6]:
from sklearn import preprocessing

def prepare(y, t, w, x, rx2id, target):
    patid_temp = list(w['patid'].unique())
    temp_le = preprocessing.LabelEncoder()
    temp_le.fit(list(patid_temp))
    w['row_idx'] = temp_le.transform(w['patid'])
    
    w_sparse = csr_matrix((w['log_count'], (w['row_idx'], w['phecode3'])))
    w = w_sparse.toarray()
    
    x_temp = np.concatenate((w, x.values), axis=1)
    
    treatment_train = [0] * len(t)
    temp_index = t.index
    idx = 0

    def get_classes(value):
        return [k for k, v in rx2id.items() if v == value]

    for i in temp_index:
        classes = t.loc[i, 'antiasthma']
        if (classes != target):
            treatment_train[idx] = 'control'
        else:
            treatment_train[idx] = 'treatment'
        idx += 1
        
    treatment = pd.DataFrame(treatment_train)
    treatment.index = temp_index
    treatment.columns = ['treatment']
    
    y = pd.DataFrame(y)
    feature_df = pd.DataFrame(x_temp)
    feature_df.index = y.index
    
    df = pd.concat([y, treatment, t, feature_df], axis=1)
    df.index = np.arange(0, len(df))
    return df

# Benchmark (Agonist)

In [7]:
target = 1
df_val0 = prepare(Y_val, T_val, W_val, X_val, rx2id, target)
df_test0 = prepare(Y_test, T_test, W_test, X_test, rx2id, target)
df_train0 = prepare(Y_train, T_train, W_train, X_train, rx2id, target)

In [8]:
x_train0 = df_train0.iloc[:, 5:]
x_test0 = df_test0.iloc[:, 5:]
x_val0 = df_val0.iloc[:, 5:]

In [9]:
from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor
from causalml.metrics import plot_gain, auuc_score
from causalml.inference.meta import LRSRegressor
from causalml.inference.meta import XGBTRegressor, MLPTRegressor
from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier
from causalml.feature_selection.filters import FilterSelect

from causalml.propensity import GradientBoostedPropensityModel
from causalml.propensity import compute_propensity_score

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import ElasticNet

In [10]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

## Propensity score estimation

In [11]:
t = df_train0['treatment']

In [12]:
treatment = [0] * len(t)
for i in range(len(t)):
    if t[i] == 'control':
        treatment[i] = 0
    else:
        treatment[i] = 1

In [13]:
t_test = df_test0['treatment']
treatment_test = [0] * len(t_test)
for i in range(len(t_test)):
    if t_test[i] == 'control':
        treatment_test[i] = 0
    else:
        treatment_test[i] = 1 

In [14]:
t_val = df_val0['treatment']
treatment_val = [0] * len(t_val)
for i in range(len(t_val)):
    if t_val[i] == 'control':
        treatment_val[i] = 0
    else:
        treatment_val[i] = 1 

### Logistic regression

In [15]:
glm = LogisticRegression(max_iter = 3000)
glm.fit(x_train0, treatment)

LogisticRegression(max_iter=3000)

In [16]:
t_pred_p = glm.predict_proba(x_val0)

In [17]:
lr_probs = t_pred_p[:, 1]

In [18]:
lr_probs

array([0.17856534, 0.26521402, 0.18511665, ..., 0.18130233, 0.30193069,
       0.28136381])

In [19]:
roc_auc_score(treatment_val, lr_probs)

0.5062264061349985

### Random Forest

In [20]:
rf = RandomForestClassifier(max_depth=6, random_state=1105)
rf.fit(x_train0, treatment)

RandomForestClassifier(max_depth=6, random_state=1105)

In [21]:
t_pred_p = rf.predict_proba(x_val0)
lr_probs = t_pred_p[:, 1]
roc_auc_score(treatment_val, lr_probs)

0.5207210522567012

### XGBC

In [22]:
xgb = XGBClassifier(max_depth=6, random_state=1105, n_estimators=100)
xgb.fit(x_train0, treatment)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              random_state=1105, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [23]:
t_pred_p = xgb.predict_proba(x_val0)
lr_probs = t_pred_p[:, 1]
roc_auc_score(treatment_val, lr_probs)

0.5316721669555308

### Gradient Boosting Method

In [24]:
gbc = GradientBoostingClassifier(max_depth = 6, random_state = 1105)

In [25]:
gbc.fit(x_train0, treatment)

GradientBoostingClassifier(max_depth=6, random_state=1105)

In [26]:
t_pred_p = gbc.predict_proba(x_val0)
lr_probs = t_pred_p[:, 1]
roc_auc_score(treatment_val, lr_probs)

0.5254803849410796

### Model with feature selection

In [27]:
glm_l1 = LogisticRegression(penalty='elasticnet', max_iter = 3000, solver='saga', l1_ratio=0.1)
glm_l1.fit(x_train0, treatment)

LogisticRegression(l1_ratio=0.1, max_iter=3000, penalty='elasticnet',
                   solver='saga')

In [28]:
t_pred_p = glm_l1.predict_proba(x_val0)

In [29]:
lr_probs = t_pred_p[:, 1]

In [30]:
roc_auc_score(treatment_val, lr_probs)

0.5059583165159034

## Propensity score matching

In [31]:
score = xgb.predict_proba(x_train0)[:, 1]
df_train0['score'] = score
df_train0['T'] = treatment

In [32]:
score = list(score)

In [33]:
df_train0

Unnamed: 0,adrd,treatment,patid,antiasthma,log_count,0,1,2,3,4,...,241,242,243,244,245,246,247,248,score,T
0,0,treatment,9722,1,1.945910,0.0,0.000000,0.000000,0.000000,0.000000,...,0.0,75.0,1.0,1.0,0.0,0.0,0.0,0.0,0.883606,1
1,0,control,1232,5,1.386294,0.0,0.000000,1.098612,1.098612,0.693147,...,0.0,72.0,0.0,0.0,0.0,0.0,0.0,1.0,0.117459,0
2,0,treatment,2220,1,3.583519,0.0,0.000000,0.000000,0.000000,0.000000,...,0.0,74.0,0.0,0.0,0.0,0.0,0.0,1.0,0.906682,1
3,0,control,1806,4,0.693147,0.0,0.000000,0.000000,0.000000,0.000000,...,0.0,83.0,1.0,0.0,0.0,1.0,0.0,0.0,0.076535,0
4,0,control,9768,2,1.386294,0.0,0.000000,0.000000,0.000000,0.000000,...,0.0,84.0,0.0,0.0,0.0,0.0,0.0,1.0,0.076240,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6766,0,control,9185,3,3.044522,0.0,0.000000,0.000000,0.000000,0.000000,...,0.0,90.0,0.0,0.0,0.0,0.0,0.0,1.0,0.123111,0
6767,1,treatment,9752,1,1.609438,0.0,0.000000,0.693147,0.000000,0.000000,...,0.0,83.0,1.0,0.0,0.0,0.0,0.0,1.0,0.609267,1
6768,0,treatment,3868,1,0.693147,0.0,0.000000,0.693147,0.000000,0.000000,...,0.0,88.0,0.0,0.0,0.0,1.0,0.0,0.0,0.323897,1
6769,0,treatment,6972,1,0.693147,0.0,0.000000,0.693147,0.000000,0.000000,...,0.0,89.0,0.0,0.0,0.0,0.0,0.0,1.0,0.465055,1


In [34]:
from causalml.match import NearestNeighborMatch, create_table_one

psm = NearestNeighborMatch(replace=False,
                           ratio=1,
                           random_state=42)

matched = psm.match(data=df_train0,
                    treatment_col='T',
                    score_cols=['score'])


In [35]:
sum(treatment)

1746

In [36]:
matched

Unnamed: 0,adrd,treatment,patid,antiasthma,log_count,0,1,2,3,4,...,241,242,243,244,245,246,247,248,score,T
1398,0,treatment,4992,1,2.708050,0.000000,0.0,0.000000,0.0,0.0,...,0.0,82.0,1.0,0.0,0.0,0.0,0.0,1.0,0.201304,1
5975,0,treatment,8502,1,1.609438,0.000000,0.0,1.098612,0.0,0.0,...,0.0,76.0,0.0,0.0,0.0,0.0,0.0,1.0,0.274878,1
1425,0,treatment,2809,1,1.098612,0.000000,0.0,0.000000,0.0,0.0,...,0.0,90.0,1.0,0.0,0.0,1.0,0.0,0.0,0.478050,1
1032,0,treatment,9158,1,3.737670,0.000000,0.0,0.000000,0.0,0.0,...,0.0,89.0,1.0,0.0,0.0,0.0,0.0,1.0,0.430783,1
2443,0,treatment,4366,1,3.258097,0.000000,0.0,0.000000,0.0,0.0,...,0.0,74.0,0.0,0.0,0.0,0.0,0.0,1.0,0.466876,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4222,0,control,10558,5,0.693147,0.000000,0.0,0.000000,0.0,0.0,...,0.0,80.0,0.0,0.0,1.0,0.0,0.0,0.0,0.212148,0
4803,0,control,3580,5,2.944439,0.000000,0.0,0.693147,0.0,0.0,...,0.0,76.0,1.0,0.0,1.0,0.0,0.0,0.0,0.302202,0
5429,1,control,9320,5,4.077537,0.000000,0.0,1.098612,0.0,0.0,...,0.0,89.0,1.0,0.0,0.0,0.0,0.0,1.0,0.158461,0
3571,1,control,10390,5,2.772589,1.386294,0.0,0.000000,0.0,0.0,...,0.0,88.0,1.0,0.0,1.0,0.0,0.0,0.0,0.302031,0


# Meta-learners

In [37]:
l = len(x_train0)
score = xgb.predict_proba(x_train0)
e = {t: score[0:l, t] for t in np.unique(treatment)}

In [38]:
pickle.dump(e, open(save_path+'ps1.pkl', 'wb'))

## S-learner

### Logistic regression based

In [39]:
learner_s = BaseSClassifier(learner=LogisticRegression(max_iter=3000), control_name='control')
learner_s.fit(X=x_train0,treatment=df_train0['treatment'],y=df_train0['adrd'])

In [40]:
cate_s = learner_s.predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])
test_learner_s = BaseSClassifier(learner=LogisticRegression(max_iter=3000), control_name='control')
test_cate_s = test_learner_s.fit_predict(X=x_test0,
                treatment=df_test0['treatment'],
                y=df_test0['adrd'])

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.5529
INFO:causalml:     AUC (Treatment):     0.5408
INFO:causalml:Log Loss   (Control):     0.3832
INFO:causalml:Log Loss (Treatment):     0.4028
INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.9443
INFO:causalml:     AUC (Treatment):     0.9365
INFO:causalml:Log Loss   (Control):     0.1873
INFO:causalml:Log Loss (Treatment):     0.2046


In [41]:
((cate_s-test_cate_s)**2).mean()**(1/2)

0.03615171232241213

In [42]:
cate_s.var()

5.605520083955194e-05

### Random Forest base learner

In [44]:
learner_srf = BaseSClassifier(learner=RandomForestClassifier(max_depth=6, random_state=1105), control_name='control')
learner_srf.fit(X=x_train0,treatment=df_train0['treatment'],y=df_train0['adrd'])

In [45]:
cate_srf = learner_srf.predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])
test_learner_srf = BaseSClassifier(learner=RandomForestClassifier(max_depth=6, random_state=1105), control_name='control')
test_cate_srf = test_learner_srf.fit_predict(X=x_test0,
                treatment=df_test0['treatment'], y=df_test0['adrd'])

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.5316
INFO:causalml:     AUC (Treatment):     0.5380
INFO:causalml:Log Loss   (Control):     0.3713
INFO:causalml:Log Loss (Treatment):     0.3882
INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.9832
INFO:causalml:     AUC (Treatment):     0.9797
INFO:causalml:Log Loss   (Control):     0.2432
INFO:causalml:Log Loss (Treatment):     0.2596


In [46]:
((cate_srf-test_cate_srf)**2).mean()**(1/2)

0.0026899213676390664

In [47]:
cate_srf.var()

3.0918430505870655e-07

### XGBoosting

In [48]:
learner_sxg = BaseSClassifier(learner=XGBClassifier(max_depth=6, random_state=1105, n_estimators=100), control_name='control')
learner_sxg.fit(X=x_train0,treatment=df_train0['treatment'],y=df_train0['adrd'])
cate_sxg = learner_sxg.predict(X=x_test0, treatment=df_test0['treatment'])
test_learner_sxg = BaseSClassifier(learner=XGBClassifier(max_depth=6, random_state=1105, n_estimators=100), control_name='control')
test_cate_sxg = test_learner_sxg.fit_predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])
((cate_sxg-test_cate_sxg)**2).mean()**(1/2)

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     1.0000
INFO:causalml:     AUC (Treatment):     1.0000
INFO:causalml:Log Loss   (Control):     0.0084
INFO:causalml:Log Loss (Treatment):     0.0091


0.01773468584328339

## T-learner

### Logistic regression

In [49]:
learner_t = BaseTClassifier(learner = LogisticRegression(max_iter = 3000), control_name='control')
learner_t.fit(X=x_train0, treatment=df_train0['treatment'], y=df_train0['adrd'])

In [50]:
cate_t = learner_t.predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.5400
INFO:causalml:     AUC (Treatment):     0.5475
INFO:causalml:Log Loss   (Control):     0.3939
INFO:causalml:Log Loss (Treatment):     0.5024


In [51]:
test_learner_t = BaseTClassifier(learner = LogisticRegression(max_iter = 3000), control_name='control')
test_cate_t = test_learner_t.fit_predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.9636
INFO:causalml:     AUC (Treatment):     0.9992
INFO:causalml:Log Loss   (Control):     0.1602
INFO:causalml:Log Loss (Treatment):     0.0691


In [52]:
((cate_t-test_cate_t)**2).mean()**(1/2)

0.3105545486061205

### Random Forest

In [53]:
learner_trf = BaseTClassifier(learner=RandomForestClassifier(max_depth=6, random_state=1105), control_name='control')
learner_trf.fit(X=x_train0,treatment=df_train0['treatment'],y=df_train0['adrd'])
cate_trf = learner_trf.predict(X=x_test0, treatment=df_test0['treatment'])
test_learner_trf = BaseTClassifier(learner=RandomForestClassifier(max_depth=6, random_state=1105), control_name='control')
test_cate_trf = test_learner_trf.fit_predict(X=x_test0,
                treatment=df_test0['treatment'], y=df_test0['adrd'])
((cate_trf-test_cate_trf)**2).mean()**(1/2)

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     0.9861
INFO:causalml:     AUC (Treatment):     0.9965
INFO:causalml:Log Loss   (Control):     0.2268
INFO:causalml:Log Loss (Treatment):     0.2135


0.07343445152391198

### XGBoosting

In [54]:
learner_txg = BaseTClassifier(learner=XGBClassifier(max_depth=6, random_state=1105, n_estimators=100), control_name='control')
learner_txg.fit(X=x_train0,treatment=df_train0['treatment'],y=df_train0['adrd'])
cate_txg = learner_txg.predict(X=x_test0, treatment=df_test0['treatment'])
test_learner_txg = BaseTClassifier(learner=XGBClassifier(max_depth=6, random_state=1105, n_estimators=100), control_name='control')
test_cate_txg = test_learner_txg.fit_predict(X=x_test0, treatment=df_test0['treatment'], y=df_test0['adrd'])
((cate_txg-test_cate_txg)**2).mean()**(1/2)

INFO:causalml:Error metrics for group treatment
INFO:causalml:     AUC   (Control):     1.0000
INFO:causalml:     AUC (Treatment):     1.0000
INFO:causalml:Log Loss   (Control):     0.0069
INFO:causalml:Log Loss (Treatment):     0.0094


0.2959846988196666

## X-learner

In [55]:
l = len(x_train0)
score = xgb.predict_proba(x_train0)
e = {t: score[0:l, t] for t in np.unique(treatment)}

l_test = len(x_test0)
score_test = xgb.predict_proba(x_test0)
e_test = {t: score[0:l_test, t] for t in np.unique(treatment_test)}

treatment = np.array(treatment)
treatment_test = np.array(treatment_test)

### Logistic regression

In [56]:
learner_x = BaseXClassifier(outcome_learner=LogisticRegression(max_iter=3000),
                            effect_learner=ElasticNetCV(max_iter=3000, random_state=1105))
learner_x.fit(X=x_train0, treatment=treatment, y=df_train0['adrd'], p=e)
cate_x = learner_x.predict(X=x_test0, treatment=treatment_test, p=e_test)
test_learner_x = BaseXClassifier(outcome_learner=LogisticRegression(max_iter=3000), 
                                 effect_learner=ElasticNetCV(max_iter=3000, random_state=1105))
test_cate_x = test_learner_x.fit_predict(X=x_test0, treatment=treatment_test, y=df_test0['adrd'], p=e_test)
((cate_x-test_cate_x)**2).mean()**(1/2)

INFO:causalml:Error metrics for group 1
INFO:causalml:     AUC   (Control):     0.9636
INFO:causalml:     AUC (Treatment):     0.9992
INFO:causalml:Log Loss   (Control):     0.1602
INFO:causalml:Log Loss (Treatment):     0.0691


0.07263506576962021

### Random Forest

In [57]:
learner_xrf = BaseXClassifier(outcome_learner=RandomForestClassifier(random_state=1105, max_depth=6),
                            effect_learner=RandomForestRegressor(random_state=1105, max_depth=6))
learner_xrf.fit(X=x_train0, treatment=treatment, y=df_train0['adrd'], p=e)
cate_xrf = learner_xrf.predict(X=x_test0, treatment=treatment_test, p=e_test)
test_learner_xrf = BaseXClassifier(outcome_learner=RandomForestClassifier(random_state=1105, max_depth=6), 
                                 effect_learner=RandomForestRegressor(random_state=1105, max_depth=6))
test_cate_xrf = test_learner_xrf.fit_predict(X=x_test0, treatment=treatment_test, y=df_test0['adrd'], p=e_test)
((cate_xrf-test_cate_xrf)**2).mean()**(1/2)

INFO:causalml:Error metrics for group 1
INFO:causalml:     AUC   (Control):     0.9861
INFO:causalml:     AUC (Treatment):     0.9965
INFO:causalml:Log Loss   (Control):     0.2268
INFO:causalml:Log Loss (Treatment):     0.2135


0.11825156610781064

### XGB

In [58]:
learner_xgb = BaseXClassifier(outcome_learner=XGBClassifier(random_state=1105, max_depth=6, n_estimators =100),
                             effect_learner=XGBRegressor(random_state=1105, max_depth=6))
learner_xgb.fit(X=x_train0, treatment=treatment, y=df_train0['adrd'], p=e)
cate_xgb = learner_xgb.predict(X=x_test0, treatment=treatment_test, p=e_test)
test_learner_xgb = BaseXClassifier(outcome_learner=XGBClassifier(random_state=1105, max_depth=6, n_estimators =100),
                             effect_learner=XGBRegressor(random_state=1105, max_depth=6))
test_cate_xgb = test_learner_xgb.fit_predict(X=x_test0, treatment=treatment_test, y=df_test0['adrd'], p=e_test)
((cate_xgb-test_cate_xgb)**2).mean()**(1/2)

INFO:causalml:Error metrics for group 1
INFO:causalml:     AUC   (Control):     1.0000
INFO:causalml:     AUC (Treatment):     1.0000
INFO:causalml:Log Loss   (Control):     0.0069
INFO:causalml:Log Loss (Treatment):     0.0094


0.24008614713283347

## R-learner

### Logistic regression

In [59]:
learner_r = BaseRClassifier(outcome_learner=LogisticRegression(max_iter=3000),
                            effect_learner=ElasticNet(max_iter=3000, random_state=1105))
learner_r.fit(X=x_train0, treatment=treatment, y=df_train0['adrd'], p=e)
cate_r = learner_r.predict(X=x_test0, p=e_test)
test_learner_r = BaseRClassifier(outcome_learner=LogisticRegression(max_iter=3000), 
                                 effect_learner=ElasticNet(max_iter=3000, random_state=1105))
test_cate_r = test_learner_r.fit_predict(X=x_test0, treatment=treatment_test, y=df_test0['adrd'], p=e_test)
((cate_r-test_cate_r)**2).mean()**(1/2)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for 1 with R-loss
INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for 1 with R-loss


0.0265762851850157

### XGBoosting

In [60]:
x_train = np.array(x_train0)
x_test = np.array(x_test0)

In [None]:
learner_rgb = BaseRClassifier(outcome_learner=XGBClassifier(random_state=1105, max_depth=5, n_estimators =100),
                             effect_learner=XGBRegressor(random_state=1105, max_depth=5))
learner_rgb.fit(X=x_train, treatment=treatment, y=df_train0['adrd'], p=e)
cate_rgb = learner_rgb.predict(X=x_test, p=e_test)
test_learner_rgb = BaseRClassifier(outcome_learner=XGBClassifier(random_state=1105, max_depth=5, n_estimators =100),
                             effect_learner=XGBRegressor(random_state=1105, max_depth=5))
test_cate_rgb = test_learner_rgb.fit_predict(X=x_test, treatment=treatment_test, y=df_test0['adrd'], p=e_test)
((cate_rgb-test_cate_rgb)**2).mean()**(1/2)

INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for 1 with R-loss
INFO:causalml:generating out-of-fold CV outcome estimates
INFO:causalml:training the treatment effect model for 1 with R-loss


## Evaluation

In [None]:
import matplotlib.pyplot as plt

In [None]:
data1 = [cate_srf.flatten(), cate_trf.flatten(), cate_xrf.flatten()] 

In [None]:
plt.hist(data1, bins, label = ['S-RF', 'T-RF', 'X-RF'])
plt.legend(loc='upper left')