In [1]:
import numpy as np
import sys

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from bart_playground.bcf.bcf import BCF
from bart_playground.params import Tree
from bart_playground import *

In [2]:
proposal_probs = {"grow" : 0.5,
                  "prune" : 0.5}
n_samples = 640
generator = DataGenerator(n_samples=n_samples, n_features=2, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="piecewise_flat")
z_rng = np.random.default_rng(0)
z1 = z_rng.binomial(1, 0.5, n_samples).astype(bool)
z2 = ((1 - z1) * z_rng.binomial(1, 0.5, n_samples)).astype(bool)
z = np.column_stack((z1, z2))
y = y + z[:, 0] * 0.5 - z[:, 1] * 0.5
# z = z1.reshape(-1, 1)
# y = y + z[:, 0] * 0.5 - 0.5

X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z, random_state=42)
np.set_printoptions(suppress=True)
print(y_train[:10])

[ 0.56517481 -0.04621271 -0.27779943  0.53019707  0.98857901  0.44933973
  0.77858126  0.43837069 -0.01191688  0.875094  ]


In [3]:
print(X_train[0:5, :])

[[0.73489316 0.20240459]
 [0.74882078 0.80138943]
 [0.58106114 0.3468698 ]
 [0.97069802 0.89312112]
 [0.23855282 0.84940884]]


In [4]:
bcf = BCF(
    n_treat_arms=z.shape[1],  # Number of treatment arms
    n_mu_trees=100,       # Number of prognostic effect trees
    n_tau_trees=[50, 50],       # Number of treatment effect trees
    ndpost=100,          # Posterior samples
    nskip=100,            # Burn-in iterations
    random_state=42
)


In [5]:
# Just to hint for compilation
bcf2 = BCF(
    n_treat_arms=z.shape[1], n_mu_trees=100, n_tau_trees=[50, 50], ndpost=1, nskip=1, random_state=42
)
bcf2.fit(X_train, y_train, z_train, ps = False)

Iterations: 100%|██████████| 2/2 [00:07<00:00,  3.72s/it]


In [6]:
%prun -s cumtime -D profile_bcf.prof -q bcf.fit(X_train, y_train, z_train)

Iterations: 100%|██████████| 200/200 [00:20<00:00,  9.61it/s]


 
*** Profile stats marshalled to file 'profile_bcf.prof'.


In [7]:
!gprof2dot -f pstats profile_bcf.prof -o profile_bcf.dot
!dot -Tpng profile_bcf.dot -o profile_bcf.png

In [7]:
tree_sp : Tree = bcf.sampler.trace[-1].mu_trees[70]

print(tree_sp)
print(tree_sp.vars)
print(tree_sp.leaf_vals)

X_2 <= 0.563 (split, n = -2)
	Val: 0.000 (leaf, n = -2)
	Val: 0.004 (leaf, n = 144)
[ 2 -1 -1 -2]
[       nan 0.00033573 0.00425373        nan]


In [8]:
bcf_result = bcf.predict_components(X_test, z_test)

In [9]:
print(bcf_result[0][0:10])
print(bcf_result[1][0:10])

[ 0.1610919   0.15502469 -0.02513103  0.19560356 -0.0180001   0.16599144
  0.16963446  0.19157486  0.14897462 -0.00616356]
[[ 0.21043    -0.18655092]
 [ 0.21039799 -0.1943663 ]
 [ 0.22211409 -0.19810202]
 [ 0.21228725 -0.19436819]
 [ 0.20994748 -0.19439678]
 [ 0.21057743 -0.19571759]
 [ 0.21025034 -0.18745934]
 [ 0.21120382 -0.19546122]
 [ 0.21009583 -0.19184321]
 [ 0.20897425 -0.1953831 ]]


In [11]:
print(bcf_result[1][0:10].shape)

(10, 2)


#### MLearner class

In [12]:
from sklearn import clone

def control_indices(z):
    result = np.zeros(z.shape[0], dtype=bool)
    for arm in range(z.shape[1]):
        result = result | z[:, arm]
    return ~result

class MLearner:
    def __init__(self, n_treated_arms, model_treated, model_control):
        self.model_treated_list = [None] * n_treated_arms
        for i in range(n_treated_arms):
            self.model_treated_list[i] = clone(model_treated)
        self.model_control = clone(model_control)

    def fit(self, X, y, z):
        X_treated_list = [X[z[:, arm]] for arm in range(z.shape[1])]
        y_treated_list = [y[z[:, arm]] for arm in range(z.shape[1])]
        X_control = X[control_indices(z)]
        y_control = y[control_indices(z)]

        # Fit the models.
        for i in range(len(self.model_treated_list)):
            self.model_treated_list[i].fit(X_treated_list[i], y_treated_list[i])
        self.model_control.fit(X_control, y_control)
        # return self

    def predict(self, X, z):
        # Predict outcome: if z is True, use model_treated; else, use model_control.
        preds = np.empty(len(X))
        preds[control_indices(z)] = self.model_control.predict(X[control_indices(z)])
        for arm in range(z.shape[1]):
            preds[z[:, arm]] = self.model_treated_list[arm].predict(X[z[:, arm]])
        return preds

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(random_state=42)
dt = DecisionTreeRegressor(random_state=42)
lr = LinearRegression()

#### Comparison

In [14]:
nta = z.shape[1]  # Number of treatment arms
tlearner_rf = MLearner(n_treated_arms=nta, model_treated=rf, model_control=rf)
tlearner_rf.fit(X_train, y_train, z_train)
tlearner_lr = MLearner(n_treated_arms=nta, model_treated=lr, model_control=lr)
tlearner_lr.fit(X_train, y_train, z_train)
tlearner_dt = MLearner(n_treated_arms=nta, model_treated=dt, model_control=dt)
tlearner_dt.fit(X_train, y_train, z_train)

In [15]:
models = {"bcf" : bcf, 
          "rf" : tlearner_rf, 
          "lr" : tlearner_lr,
          "dt" : tlearner_dt}
results = {}
for model_name, model in models.items():
    results[model_name] = mean_squared_error(y_test, model.predict(X_test, z_test))
results

{'bcf': 0.019123583481006283,
 'rf': 0.021441280609918477,
 'lr': 0.05466893346335514,
 'dt': 0.03223509459291045}