In [2]:
from pathlib import Path

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pymc_bart as pmb

import cloudpickle as cpkl
import dill 
print(f"Running on PyMC v{pm.__version__}")

print(f"Running on PyMC-BART v{pmb.__version__}")
import pymc_bart.utils as pmbu
import pytensor.tensor as pt

Running on PyMC v5.9.0
Running on PyMC-BART v0.5.1


In [3]:

try:
    bikes = pd.read_csv(Path("..", "data", "bikes.csv"))
except FileNotFoundError:
    bikes = pd.read_csv(pm.get_data("bikes.csv"))

features = ["hour", "temperature", "humidity", "workingday"]

X = bikes[features]
Y = bikes["count"]

xt = X[0:10]
yt = Y[0:10]

In [33]:
with pm.Model() as model_bikes:
    xdata = pm.MutableData("xdata", X)
    a = pm.Exponential("a", 1)
    mu_ = pmb.BART("mu_", xdata, np.log(Y), m=20)
    mu = pm.Deterministic("mu", pm.math.exp(mu_))
    y = pm.NegativeBinomial("y", mu=mu, alpha=a, observed=Y, shape=xdata.shape[0])
    idata_bikes = pm.sample(random_seed=99, draws=100, tune=100, compute_convergence_checks=False)
idata_bikes

Only 100 samples in chain.
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [a]
>PGBART: [mu_]


Sampling 4 chains for 100 tune and 100 draw iterations (400 + 400 draws total) took 4 seconds.


# Save all_trees
- this works to pickle and load

In [4]:
all_trees = list(model_bikes.mu_.owner.op.all_trees)


NameError: name 'model_bikes' is not defined

In [185]:
with open('test_all_tree.pkl', mode='wb') as file:
   cpkl.dump(all_trees, file)

In [5]:
with open('test_all_tree.pkl', mode='rb') as file:
   all_tree2 = cpkl.load(file)

In [6]:
rng = np.random.default_rng()
xt_2 = pt.constant(xt)
smpl = pmbu._sample_posterior(all_tree2, xt_2, rng, size = 400, shape=1)
mu_smpl = smpl.mean(0)
ex_mu = pm.math.exp(mu_smpl).eval()

In [39]:
# idata_bikes.to_netcdf("test3.nc")
# idata2 = az.from_netcdf("test3.nc")

# # pickle
# with open('test4.pkl', mode='wb') as file:
#    cpkl.dump(idata_bikes, file)

with open("test4.pkl", mode="rb") as file:
    idata4 = cpkl.load(file)


In [47]:
with model_bikes:
    pm.set_data({"xdata": xt})
    post1 = pm.sample_posterior_predictive(idata_bikes, var_names=["mu", "y"])

with model_bikes:
    pm.set_data({"xdata": xt})
    post2 = pm.sample_posterior_predictive(idata4, var_names=["mu", "y"])

# WORKS WITH OLD MODEL
#s

Sampling: [mu_, y]


Sampling: [mu_, y]


In [89]:
print(post1.posterior_predictive["mu"].values.mean((0,1)))
print(post2.posterior_predictive["mu"].values.mean((0,1)))
# post2

[29.89727173 49.11120318 50.47067189 46.83670032 42.78398617 38.61454919
 35.57313739 29.31785712 29.31785712 43.2423914 ]
[30.85734391 47.89703106 51.42968096 47.04174149 42.81633842 38.50513225
 35.5118569  30.09280733 30.09280733 44.21621381]


In [48]:
print(post1.posterior_predictive["mu"].values.mean((0,1)))
print(post2.posterior_predictive["mu"].values.mean((0,1)))

[26.88716445 46.02483416 53.45011577 44.66657441 40.84422293 36.94787841
 35.03789103 27.5035715  27.5035715  43.2269759 ]
[26.12314446 46.23819594 51.43980561 44.14335237 40.79874207 36.6990834
 35.00187807 27.35617572 27.35617572 41.85558153]


In [8]:
np.exp(mu_smpl)

array([[27.6650067 ],
       [45.81105935],
       [55.01219474],
       [45.30494347],
       [42.86134523],
       [38.777871  ],
       [38.36400142],
       [30.37744653],
       [30.37744653],
       [45.244831  ]])

In [7]:
print(ex_mu)

[[27.6650067 ]
 [45.81105935]
 [55.01219474]
 [45.30494347]
 [42.86134523]
 [38.777871  ]
 [38.36400142]
 [30.37744653]
 [30.37744653]
 [45.244831  ]]
