In [1]:
import numpy as np
import sys
from pyinstrument import Profiler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from bart_playground import *
import bartz

In [2]:
proposal_probs = {"grow" : 0.5,
                  "prune" : 0.5}
generator = DataGenerator(n_samples=160, n_features=2, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="piecewise_flat")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
np.set_printoptions(suppress=True)
print(y_train[:12])

[ 0.50327821  0.60672224  0.26898966  0.55211673  0.50693811  0.66162097
 -0.64127659  0.65112284  0.03487759  0.23276531  0.44055996  0.38216964]


In [3]:
# profiler = Profiler()
# profiler.start()
bart = DefaultBART(ndpost=50, nskip=100, n_trees=100, proposal_probs=proposal_probs)
bart.fit(X_train, y_train)
# profiler.stop()
# profiler.print()

Iterations: 100%|██████████| 150/150 [00:04<00:00, 32.28it/s]


In [4]:
from tqdm import tqdm

# profiler = Profiler()
# profiler.start()
test_n = X_test.shape[0]
for i in tqdm(range(test_n - 1)):
    # use i:(i+1) to avoid dimensional reduction
    bart.update_fit(X_test[i:(i+1), :], y_test[i:(i+1)], add_ndpost=10, add_nskip=5, quietly=True)
# profiler.stop()
# profiler.print()
bart.update_fit(X_test[-1:, :], y_test[-1:], add_ndpost=40, add_nskip=20)

100%|██████████| 39/39 [00:17<00:00,  2.18it/s]
Iterations: 100%|██████████| 60/60 [00:01<00:00, 32.76it/s]


<bart_playground.bart.DefaultBART at 0x263439158d0>

In [5]:
arrays = [tree.vars for tree in bart.trace[-1].trees]
counts = np.array([np.count_nonzero(arr >= 0) for arr in arrays])
print(counts)
deep_trees = np.array([count >= 3 for count in counts])
print(np.where(deep_trees))

[1 1 0 1 1 1 1 1 1 0 2 2 1 2 1 1 1 1 1 1 1 1 2 0 1 2 1 1 1 3 1 1 1 2 1 1 1
 1 1 2 2 1 1 1 1 1 1 1 0 1 0 1 2 3 1 2 1 1 1 1 2 2 0 1 3 1 2 1 1 0 1 2 1 2
 1 1 2 1 1 1 1 1 1 2 1 1 2 2 2 0 3 1 1 1 1 1 1 2 1 1]
(array([29, 53, 64, 90], dtype=int64),)


In [6]:
print(bart.trace[-1].global_params)

{'eps_sigma2': array([0.00762701])}


In [7]:
tree_sp : Tree = bart.trace[-1].trees[72]

print(tree_sp)
print(tree_sp.vars)
print(tree_sp.leaf_vals)

X_0 <= 0.375 (split, n = 120)
	Val: 0.021 (leaf, n = 70)
	Val: -0.007 (leaf, n = 90)
[ 0 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
[        nan  0.02123494 -0.0067989          nan         nan         nan
         nan         nan         nan         nan         nan         nan
         nan         nan         nan         nan]


In [8]:
rf = RandomForestRegressor(random_state=42)
lr = LinearRegression()
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

btz = bartz.BART.gbart(np.transpose(X_train), y_train, ntree=100, ndpost=200, nskip=100)
btpred_all = btz.predict(np.transpose(X_test))
btpred = np.mean(np.array(btpred_all), axis=0)

Iteration 100/300 P_grow=0.55 P_prune=0.45 A_grow=0.36 A_prune=0.36 (burnin)
Iteration 200/300 P_grow=0.57 P_prune=0.43 A_grow=0.35 A_prune=0.37
Iteration 300/300 P_grow=0.57 P_prune=0.43 A_grow=0.39 A_prune=0.40


In [9]:
models = {"bart" : bart, 
          "rf" : rf, 
          "lr" : lr,
          "btz" : btz}
results = {}
for model_name, model in models.items():
    if model_name == "btz":
        results[model_name] = mean_squared_error(y_test, btpred)
    else:
        results[model_name] = mean_squared_error(y_test, model.predict(X_test))
results

{'bart': 0.012859467216255988,
 'rf': 0.022139023845392215,
 'lr': 0.048045521328019404,
 'btz': 0.02328283761397566}

In [10]:
print(bart.sampler.trace[-1].evaluate(X_train)[:12])
print(bart.preprocessor.transform_y(y_train)[:12])

[ 0.36285441  0.34324054  0.21345889  0.30186148  0.37323052  0.34360495
 -0.28949465  0.43301502 -0.0490685   0.16001756  0.2003634   0.38512668]
[ 0.34923863  0.42552948  0.17644883  0.38525745  0.35193784  0.46601776
 -0.49488025  0.4582753   0.0037892   0.14973307  0.30298339  0.25992003]


In [11]:
if all([(bart.trace[-1].trees[i].evaluate()[range(X_train.shape[0]), ] == bart.trace[-1].trees[i].evaluate(X_train)).all()
            for i in range(100)]):
    print("True")
else:
    print("False")

True


In [12]:
if np.allclose(bart.trace[-1].evaluate()[range(X_train.shape[0]), ], bart.trace[-1].evaluate(X_train)):
    print("True")
else:
    print("False")

True
