In [1]:
import numpy as np
import sys
from pyinstrument import Profiler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import os
sys.path.append(os.path.dirname(os.getcwd()))
from bart_playground import *

import bartz

In [2]:
proposal_probs = {"grow" : 0.5,
                  "prune" : 0.5}
generator = DataGenerator(n_samples=160, n_features=2, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="piecewise_flat")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
np.set_printoptions(suppress=True)
print(y_train[:12])

[ 0.50327821  0.60672224  0.26898966  0.55211673  0.50693811  0.66162097
 -0.64127659  0.65112284  0.03487759  0.23276531  0.44055996  0.38216964]


In [3]:
profiler = Profiler()
profiler.start()
bart = ChangeNumTreeBART(ndpost=200, nskip=100, n_trees=100, proposal_probs=proposal_probs)
bart.fit(X_train, y_train)
profiler.stop()
profiler.print()

Iterations: 100%|██████████| 300/300 [00:30<00:00,  9.71it/s]



  _     ._   __/__   _ _  _  _ _/_   Recorded: 21:05:46  Samples:  29992
 /_//_/// /_\ / //_// / //_'/ //     Duration: 30.921    CPU time: 29.672
/   _/                      v5.0.1

Profile at C:\Windows\Temp\ipykernel_25972\1745855184.py:2

30.920 <module>  C:\Windows\Temp\ipykernel_25972\1745855184.py:1
└─ 30.920 ChangeNumTreeBART.fit  bart_playground\bart.py:22
   └─ 30.916 NTreeSampler.run  bart_playground\samplers.py:69
      └─ 30.597 NTreeSampler.one_iter  bart_playground\samplers.py:224
         ├─ 14.600 NTreeSampler.log_mh_ratio  bart_playground\samplers.py:218
         │  ├─ 10.822 BARTLikelihood.trees_log_marginal_lkhd_ratio  bart_playground\priors.py:298
         │  │  └─ 10.620 BARTLikelihood.trees_log_marginal_lkhd  bart_playground\priors.py:256
         │  │     ├─ 2.771 [self]  bart_playground\priors.py
         │  │     ├─ 2.699 svd  <__array_function__ internals>:177
         │  │     │  └─ 2.584 svd  numpy\linalg\linalg.py:1463
         │  │     ├─ 2.307 Parameter

In [4]:
rf = RandomForestRegressor()
lr = LinearRegression()
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

btz = bartz.BART.gbart(np.transpose(X_train), y_train, ntree=100, ndpost=200, nskip=100)
btpred_all = btz.predict(np.transpose(X_test))
btpred = np.mean(np.array(btpred_all), axis=0)

Iteration 100/300 P_grow=0.55 P_prune=0.45 A_grow=0.36 A_prune=0.36 (burnin)
Iteration 200/300 P_grow=0.57 P_prune=0.43 A_grow=0.35 A_prune=0.37
Iteration 300/300 P_grow=0.57 P_prune=0.43 A_grow=0.39 A_prune=0.40


In [5]:
bart.predict(X_test)

TypeError: list indices must be integers or slices, not list

In [None]:

models = {"bart" : bart, 
          "rf" : rf, 
          "lr" : lr,
          "btz" : btz}
results = {}
for model_name, model in models.items():
    if model_name == "btz":
        results[model_name] = mean_squared_error(y_test, btpred)
    else:
        results[model_name] = mean_squared_error(y_test, model.predict(X_test))
results