In [1]:
import numpy as np
import sys
from pyinstrument import Profiler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

sys.path.append("../")

from bart_playground import *

In [2]:
proposal_probs = {"grow" : 0.5,
                  "prune" : 0.5}
generator = DataGenerator(n_samples=200, n_features=2, noise=0.1, random_seed=42)
X, y = generator.generate(scenario="linear")
X_train, X_test, y_train, y_test = train_test_split(X, y)
bart = DefaultBART(ndpost=100, nskip=100, n_trees=200, proposal_probs=proposal_probs)

In [3]:
profiler = Profiler()
profiler.start()
bart.fit(X_train, y_train)
profiler.stop()
profiler.print()

Running iteration 0
Running iteration 10
Running iteration 20
Running iteration 30
Running iteration 40
Running iteration 50
Running iteration 60
Running iteration 70
Running iteration 80
Running iteration 90
Running iteration 100
Running iteration 110
Running iteration 120
Running iteration 130
Running iteration 140
Running iteration 150
Running iteration 160
Running iteration 170
Running iteration 180
Running iteration 190

  _     ._   __/__   _ _  _  _ _/_   Recorded: 15:06:19  Samples:  15206
 /_//_/// /_\ / //_// / //_'/ //     Duration: 15.422    CPU time: 71.631
/   _/                      v5.0.1

Profile at /var/folders/7s/13cq5lrs781dz7rrxxz25x5h0000gn/T/ipykernel_90157/1228367378.py:2

15.421 <module>  /var/folders/7s/13cq5lrs781dz7rrxxz25x5h0000gn/T/ipykernel_90157/1228367378.py:1
└─ 15.421 DefaultBART.fit  bart_playground/bart.py:23
   └─ 15.415 DefaultSampler.run  bart_playground/samplers.py:60
      └─ 15.394 DefaultSampler.one_iter  bart_playground/samplers.py:148
     

In [4]:
rf = RandomForestRegressor()
lr = LinearRegression()
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

In [6]:
models = {"bart" : bart, 
          "rf" : rf, 
          "lr" : lr}
results = {}
for model_name, model in models.items():
    results[model_name] = mean_squared_error(y_test, model.predict(X_test))
results

{'bart': 2.269018691054929,
 'rf': 0.02940935627563496,
 'lr': 0.009467235483044994}

In [7]:
mean_squared_error(y_test, np.ones_like(y_test) * y_test.mean())

2.2726822997567653

In [18]:
bart.trace[10].evaluate()

array([ -9.94448785,  -1.14384066,   0.82995164,  -6.72625767,
       -28.43141706, -23.17601633, -20.75988906, -10.43971582,
        -2.26679646, -16.5006751 , -11.26486968, -23.31768236,
       -28.63411822, -28.4226942 , -27.08822768, -20.05680917,
       -18.27173574, -25.10129273,  -2.74696082,   3.1967124 ,
       -10.24063128, -21.61170533, -14.97358994,   0.42287353,
       -10.35074975,  -7.24391433,  -5.29268534,  -6.15168005,
       -20.2527401 , -18.96828459, -15.6054169 ,  -4.5578139 ,
        -9.47128732,  -5.55855012, -27.98774127,  -1.62883099,
       -26.96210983, -13.61791556,   2.09651552,  -9.91298106,
       -27.1297207 , -29.03421454,   2.07966529,  -9.2381045 ,
        -8.636059  ,  -3.75951425, -24.88873094, -28.57221152,
        -7.54968178, -20.13765812, -11.22425223, -25.46170964,
         0.95837828, -22.82417078,   1.99920847, -23.70635151,
       -12.36664547, -18.76126667,  -9.46798622,   3.89651125,
        -6.96970459,  -7.24112054, -11.60324616, -27.25