In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from bart_playground import DefaultBART, Tree, DataGenerator
import bartz

In [3]:
proposal_probs = {"grow" : 0.4,
                  "prune" : 0.4,
                  "change" : 0.1,
                  "swap" : 0.1}
generator = DataGenerator(n_samples=5000, n_features=40, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="linear")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
np.set_printoptions(suppress=True)
print(y_train[:12])

[47.77161298 52.6198333  64.61658169 53.45362304 59.58934438 59.78809596
 54.26683888 62.27312519 59.07143414 46.83945454 58.58820983 66.35490222]


In [4]:
# initialize numba
bart = DefaultBART(ndpost=200, nskip=100, n_trees=100, proposal_probs=proposal_probs)
bart.fit(X_train, y_train)

Iterations: 100%|██████████| 300/300 [00:12<00:00, 24.02it/s]


In [5]:
import pandas as pd

# Collect move counts
selected = bart.sampler.move_selected_counts
success = bart.sampler.move_success_counts
accepted = bart.sampler.move_accepted_counts

# Combine into a DataFrame for easy viewing
df = pd.DataFrame({
    "selected": pd.Series(selected),
    "success": pd.Series(success),
    "accepted": pd.Series(accepted)
})

# Add success, acceptance and change rates
df["success_rate"] = df["success"] / df["selected"]
df["accept_rate"] = df["accepted"] / df["success"] # The "acceptance rate" reflects the MH acceptance probability
df["change_rate"] = df["accepted"] / df["selected"] # The "change rate" reflects the frequency of tree changes

print(df)

        selected  success  accepted  success_rate  accept_rate  change_rate
grow       11957    11957      1931      1.000000     0.161495     0.161495
prune      12104    11969      1628      0.988847     0.136018     0.134501
change      2905     2874       114      0.989329     0.039666     0.039243
swap        3034     2522       283      0.831246     0.112213     0.093276


In [6]:
bart2 = DefaultBART(ndpost=1000, nskip=100, n_trees=50, proposal_probs=proposal_probs)
%prun -s cumtime -D profile_bart.prof -q bart2.fit(X_train, y_train)
!gprof2dot -f pstats profile_bart.prof -o profile_bart.dot
!dot -Tpng profile_bart.dot -o output/profile_bart.png

Iterations: 100%|██████████| 1100/1100 [00:12<00:00, 88.56it/s] 


 
*** Profile stats marshalled to file 'profile_bart.prof'.


In [7]:
arrays = [tree.vars for tree in bart.trace[-1].trees]
counts = np.array([np.count_nonzero(arr >= 0) for arr in arrays])
print(counts)
deep_trees = np.array([count >= 3 for count in counts])
print(np.where(deep_trees))

[3 2 3 3 3 4 2 2 4 3 3 4 2 2 3 2 3 5 4 1 3 3 3 2 2 3 2 1 5 3 4 6 3 2 1 5 2
 1 3 3 4 5 3 3 2 2 4 5 3 5 3 2 3 5 2 6 3 3 3 4 3 3 1 2 3 2 4 3 2 2 3 3 4 6
 3 2 2 3 3 5 2 4 3 3 2 3 2 1 3 2 4 4 2 5 1 4 4 3 4 3]
(array([ 0,  2,  3,  4,  5,  8,  9, 10, 11, 14, 16, 17, 18, 20, 21, 22, 25,
       28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50,
       52, 53, 55, 56, 57, 58, 59, 60, 61, 64, 66, 67, 70, 71, 72, 73, 74,
       77, 78, 79, 81, 82, 83, 85, 88, 90, 91, 93, 95, 96, 97, 98, 99]),)


In [8]:
np.mean([len(arr) for arr in arrays])

np.float64(14.72)

In [9]:
print(bart.sampler.trace[-1].global_params)

{'eps_sigma2': array([0.00082771])}


In [10]:
from bart_playground import visualize_tree
tree_sp : Tree = bart.sampler.trace[-1].trees[50]

print(tree_sp)
print(tree_sp.vars)
print(tree_sp.leaf_vals)

X_18 <= 0.551478803 (split, n = 3750)
	X_37 <= 0.687696576 (split, n = 2062)
		Val: -0.008518734 (leaf, n = 1450)
		Val: -0.003518621 (leaf, n = 612)
	X_3 <= 0.931643546 (split, n = 1688)
		Val: 0.000340087 (leaf, n = 1559)
		Val: 0.011936542 (leaf, n = 129)
[18 37  3 -1 -1 -1 -1 -2]
[        nan         nan         nan -0.00851873 -0.00351862  0.00034009
  0.01193654         nan]


In [11]:
rf = RandomForestRegressor(random_state=42)
lr = LinearRegression()
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

btz = bartz.BART.gbart(np.transpose(X_train), y_train, ntree=100, ndpost=200, nskip=100)
btpred_all = btz.predict(np.transpose(X_test))
btpred = np.mean(np.array(btpred_all), axis=0)

INFO:2025-11-10 03:41:08,930:jax._src.xla_bridge:867: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory


Iteration 100/300 P_grow=0.53 P_prune=0.47 A_grow=0.11 A_prune=0.13 (burnin)
Iteration 200/300 P_grow=0.50 P_prune=0.50 A_grow=0.10 A_prune=0.08
Iteration 300/300 P_grow=0.50 P_prune=0.50 A_grow=0.06 A_prune=0.08


In [12]:
models = {"bart" : bart, 
          "rf" : rf, 
          "lr" : lr,
          "btz" : btz}
results = {}
for model_name, model in models.items():
    if model_name == "btz":
        results[model_name] = mean_squared_error(y_test, btpred)
    else:
        results[model_name] = mean_squared_error(y_test, model.predict(X_test))
results

{'bart': 1.9039084165786992,
 'rf': 15.553392321882356,
 'lr': 0.010585269816872593,
 'btz': 1.7780111190121484}

In [13]:
print(bart.sampler.trace[-1].evaluate(X_train)[:12])
print(bart.preprocessor.transform_y(y_train)[:12])

[-0.2819172  -0.17924233  0.1719373  -0.15850334  0.0223403  -0.04055745
 -0.17597386  0.06062453 -0.04503344 -0.29411054 -0.00813914  0.23933122]
[-0.29961255 -0.17263682  0.14156011 -0.15079972  0.00989589  0.01510122
 -0.12950146  0.08018457 -0.00366827 -0.32402596 -0.016324    0.18708703]


In [14]:
mean_squared_error(y_test, np.ones_like(y_test) * y_test.mean())

32.83170985183459

In [15]:
if all([(bart.sampler.trace[-1].trees[i].evaluate() == bart.sampler.trace[-1].trees[i].evaluate(X_train)).all()
            for i in range(100)]):
    print("True")
else:
    print("False")

False


In [16]:
if np.allclose(bart.trace[-1].evaluate(), bart.trace[-1].evaluate(X_train), atol=1e-6):
    print("True")
else:
    print("False")

False
