-
Notifications
You must be signed in to change notification settings - Fork 11
/
beta_binomial_example.Rmd
124 lines (94 loc) · 3.09 KB
/
beta_binomial_example.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
---
title: "beta_binomial_example"
author: "Isaac Goldstein"
date: "2023-06-30"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
```
# Libraries
```{r message = FALSE, warning = FALSE}
library(tidyverse)
library(tidybayes)
library(rstan)
library(posterior)
set.seed(236)
#options(mc.cores = parallelly::availableCores())
rstan_options(auto_write = TRUE)
```
# Data
```{r}
rat_data = read.table("https://raw.githubusercontent.com/vnminin/sismid_mcmc_one/master/2023/code/rat_tumor.txt", header=TRUE)
num_obs = dim(rat_data)[1]
```
# Fitting the model using rstan
The following code fits the beta_binom.stan model to the rat tumor data. The posterior_object contains both the data, as well as parameters specifying the model priors. These are all treated as "data" by stan.
```{r message = FALSE, warning = FALSE}
posterior_object <- list(priors_only = FALSE,
num_obs = num_obs,
x = rat_data$x,
n = rat_data$n,
lambda_alpha = 0.1,
lambda_beta = 0.1)
chain = 4
iterations = 1000
seed = 236
thin = 1
posterior_fit <- stan(file = "https://raw.githubusercontent.com/vnminin/sismid_mcmc_one/main/2023/code/beta_binom.stan",
data = posterior_object,
seed = seed,
iter = iterations,
thin = thin,
chain = chain)
```
# Assessing MCMC Diagnostics
We can assess the traceplot for the scaled likelihood.
```{r}
traceplot(posterior_fit, pars = "lp__")
```
We can also summarise Rhat and effective sample size.
```{r}
summary <- posterior_fit %>%
as_draws() %>%
summarise_draws() %>%
summarise(
min_rhat = min(rhat),
max_rhat = max(rhat),
min_ess_bulk = min(ess_bulk),
max_ess_bulk = max(ess_bulk),
min_ess_tail = min(ess_tail),
max_ess_tail = max(ess_tail)
)
summary
```
# Visualizing the results
```{r fig.width = 10, fig.height = 6}
theta_draws <- posterior_fit %>% spread_draws(theta[i])
vis_theta <- theta_draws %>% filter(i %% 2 == 1) %>%
arrange(i) %>%
mutate(theta_lab = paste("Theta ", i))
theta_plot <- vis_theta %>%
ggplot(aes(x = fct_inorder(theta_lab), y = theta)) +
geom_boxplot() +
theme_bw() +
xlab("Theta") +
ylab("Value") +
theme(axis.text.x = element_text(angle = 90),
text = element_text(size = 20))
theta_plot
```
```{r}
fixed_vars <- posterior_fit %>% spread_draws(alpha, beta)
plot_vars <- fixed_vars %>% pivot_longer(-c(.chain, .iteration, .draw), names_to = "name")
fixed_plot <- plot_vars %>%
ggplot(aes(x=value)) +
geom_histogram() +
facet_wrap(vars(name)) +
theme_bw() +
xlab("Value") +
theme(text = element_text(size = 20))
fixed_plot
```