In [1]:
import numpy as np
import sys
from pyinstrument import Profiler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from bart_playground import *
import bartz

In [2]:
proposal_probs = {"grow" : 0.4,
                  "prune" : 0.4,
                  "change" : 0.1,
                  "swap" : 0.1}
generator = DataGenerator(n_samples=160, n_features=2, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="piecewise_flat")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
np.set_printoptions(suppress=True)
print(y_train[:12])

[ 0.50327821  0.60672224  0.26898966  0.55211673  0.50693811  0.66162097
 -0.64127659  0.65112284  0.03487759  0.23276531  0.44055996  0.38216964]


In [3]:
bart = DefaultBART(ndpost=200, nskip=100, n_trees=100, proposal_probs=proposal_probs)
bart.fit(X_train, y_train)

Iterations: 100%|██████████| 300/300 [00:08<00:00, 33.96it/s]


In [4]:
# from pympler import asizeof
# 
# print(asizeof.asizeof(bart.trace[-2].trees) - asizeof.asizeof(bart.trace[-2].trees[0].dataX))
# print(asizeof.asizeof(bart.trace[-2].trees[0]) - asizeof.asizeof(bart.trace[-2].trees[0].dataX))
# print(asizeof.asizeof(bart.trace[-1].trees))
# print(asizeof.asizeof(bart.trace[-1].cache))
# print(asizeof.asizeof(bart.trace[-1].trees[0]))
# print(asizeof.asizeof(bart.trace[-1].trees[0].evals))
# print(asizeof.asizeof(bart.trace[-1].trees[0].node_indicators))
# print(asizeof.asizeof(bart.trace[-1].trees[0].n))

In [5]:
bart2 = DefaultBART(ndpost=400, nskip=100, n_trees=100, proposal_probs=proposal_probs)
%prun -s cumtime -D profile_bart.prof -q bart2.fit(X_train, y_train)
!gprof2dot -f pstats profile_bart.prof -o profile_bart.dot
!dot -Tpng profile_bart.dot -o profile_bart.png

Iterations: 100%|██████████| 500/500 [00:13<00:00, 36.68it/s]

 
*** Profile stats marshalled to file 'profile_bart.prof'.



'gprof2dot' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
Error: dot: can't open profile_bart.dot: No such file or directory


In [6]:
arrays = [tree.vars for tree in bart.trace[-1].trees]
counts = np.array([np.count_nonzero(arr >= 0) for arr in arrays])
print(counts)
deep_trees = np.array([count >= 3 for count in counts])
print(np.where(deep_trees))

[1 2 1 2 1 0 2 3 1 1 1 2 1 1 1 2 0 0 2 2 1 2 2 2 2 1 1 3 3 1 1 1 2 1 1 1 3
 1 4 4 2 2 2 3 1 2 0 3 1 1 1 2 1 1 2 2 1 2 4 1 2 1 2 4 2 1 1 2 1 1 1 1 1 0
 3 1 2 2 4 1 3 2 1 1 1 1 1 5 1 1 1 3 1 0 3 2 1 1 2 1]
(array([ 7, 27, 28, 36, 38, 39, 43, 47, 58, 63, 74, 78, 80, 87, 91, 94],
      dtype=int64),)


In [7]:
print(bart.sampler.trace[-1].global_params)

{'eps_sigma2': array([0.00564064])}


In [8]:
from bart_playground import visualize_tree
tree_sp : Tree = bart.sampler.trace[-1].trees[90]

print(tree_sp)
print(tree_sp.vars)
print(tree_sp.leaf_vals)

X_0 <= 0.318 (split, n = 120)
	Val: -0.017 (leaf, n = 45)
	Val: -0.001 (leaf, n = 75)
[ 0 -1 -1 -2]
[        nan -0.0169546  -0.00132394         nan]


In [9]:
rf = RandomForestRegressor(random_state=42)
lr = LinearRegression()
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

btz = bartz.BART.gbart(np.transpose(X_train), y_train, ntree=100, ndpost=200, nskip=100)
btpred_all = btz.predict(np.transpose(X_test))
btpred = np.mean(np.array(btpred_all), axis=0)

Iteration 100/300 P_grow=0.55 P_prune=0.45 A_grow=0.36 A_prune=0.36 (burnin)
Iteration 200/300 P_grow=0.57 P_prune=0.43 A_grow=0.35 A_prune=0.37
Iteration 300/300 P_grow=0.57 P_prune=0.43 A_grow=0.39 A_prune=0.40


In [10]:
models = {"bart" : bart, 
          "rf" : rf, 
          "lr" : lr,
          "btz" : btz}
results = {}
for model_name, model in models.items():
    if model_name == "btz":
        results[model_name] = mean_squared_error(y_test, btpred)
    else:
        results[model_name] = mean_squared_error(y_test, model.predict(X_test))
results

{'bart': 0.02295469515464727,
 'rf': 0.022139023845392215,
 'lr': 0.048045521328019404,
 'btz': 0.02328283761397566}

In [11]:
print(bart.sampler.trace[-1].evaluate(X_train)[:12])
print(bart.preprocessor.transform_y(y_train)[:12])

[ 0.28642207  0.35428549  0.31771414  0.36539222  0.40073081  0.36794919
 -0.35010413  0.39542453 -0.10834898  0.04257399  0.14588958  0.32570837]
[ 0.34923863  0.42552948  0.17644883  0.38525745  0.35193784  0.46601776
 -0.49488025  0.4582753   0.0037892   0.14973307  0.30298339  0.25992003]


In [12]:
mean_squared_error(y_test, np.ones_like(y_test) * y_test.mean())

0.10534048469161521

In [13]:
if all([(bart.sampler.trace[-1].trees[i].evaluate() == bart.sampler.trace[-1].trees[i].evaluate(X_train)).all()
            for i in range(100)]):
    print("True")
else:
    print("False")

True


In [14]:
if np.allclose(bart.trace[-1].evaluate(), bart.trace[-1].evaluate(X_train)):
    print("True")
else:
    print("False")

True
