-
Notifications
You must be signed in to change notification settings - Fork 0
/
appendix.Rmd
109 lines (103 loc) · 5.21 KB
/
appendix.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
---
output:
md_document:
variant: markdown_strict+raw_tex
---
\clearpage
\normalsize
```{r appendix_setup, include=FALSE}
library(knitr); library(kableExtra)
opts_chunk$set(
echo = FALSE, message = FALSE, warning = FALSE,
dev = "png", dpi = 600
)
opts_knit$set(out.format = "latex")
options(knitr.table.format = "latex", scipen = 500, digits = 4)
library(here); library(magrittr); library(ggplot2)
source(here("summarizing.R"))
```
\section*{Appendix}
```{r appendix_model_summary}
search_engine_traffic_fit <- readr::read_rds(here("final", "fit_arma21r3ev3-longer.rds"))
posterior_summary <- broom::tidyMCMC(
search_engine_traffic_fit,
pars = c("delta0", "omega1", "limz", "gamma_mean", "gamma", "phi", "theta", "beta"),
estimate.method = "median", conf.int = TRUE, conf.method = "HPDinterval"
)
term_labels <- dplyr::data_frame(
term = c("delta0", "omega1", "limz", "gamma_mean",
paste0("gamma[", 1:12, "]"), paste0("phi[", 1:3, "]"), paste0("theta[", 1:2, "]"),
paste0("beta[", 1:7, "]")),
label = c("$\\delta_0$ (immediate impact)", "$\\omega_1$ (impact gradation)",
"$\\frac{\\delta_0}{1 - \\omega_1}$ (long-term impact)",
"$\\mu_\\gamma$ (daily average)",
paste0("$\\gamma_{", 1:12, "}$ (", month.name, " average)"),
paste0("$\\phi_", 1:3, "$"), paste0("$\\theta_", 1:2, "$"),
"$\\beta_1$ (linear trend over time)",
paste0("$\\beta_", 2:7, "$ (", c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"), ")")),
section = c(rep(1, 3), rep(2, 13), rep(4, 3), rep(5, 2), 6, rep(3, 6))
)
posterior_summary %>%
dplyr::left_join(term_labels, by = "term") %>%
dplyr::arrange(section) %>%
dplyr::select(-c(term, section)) %>%
refine_hpd(4) %>%
kable(
align = c("l", "r", "c"), booktabs = TRUE, linesep = "", escape = FALSE,
col.names = c("Parameter", "Estimate (Standard Error)", "95\\% HPD Interval"),
caption = "Estimates and confidence intervals for parameters in the model of desktop traffic to the Italian Wikipedia from all recognized search engines The model was fit to pageview counts \\emph{in millions}. For example, when reading the table, $\\beta_2 = 0.67$ indicates more 670K pageviews on average on Monday relative to Sunday."
) %>%
kable_styling(latex_options = c("striped", "hold_position")) %>%
group_rows(index = c(
"Intervention effect" = 3,
"Averages" = 13,
"Weekly seasonality (additive, relative to Sunday)" = 6,
"Autoregressive (AR) component" = sum(grepl("phi", posterior_summary$term)),
"Moving average (MA) component" = sum(grepl("theta", posterior_summary$term)),
"Other effects" = 1
))
```
\newpage
```{r appendix_marginal_likelihoods}
if (file.exists(here("fits_index.csv"))) fits <- readr::read_csv(here("fits_index.csv"))
delta0_hpd <- readr::read_csv(here("results", "delta0_hpd.csv")) %>%
refine_hpd %>% dplyr::select(-c(term, rhat))
mse <- readr::read_csv(here("results", "mse.csv")) %>%
dplyr::group_by(model) %>%
dplyr::summarize(mse = sprintf("%.3f / %.3f", mse_before, mse_after)) %>%
dplyr::left_join(delta0_hpd, by = "model")
marginal_likelihoods <- set_names(as.list(fits$lml_path), fits$model) %>%
purrr::map(readr::read_rds) %>%
purrr::map_df(~ dplyr::data_frame(logL = .x$logml), .id = "model") %>%
dplyr::mutate(post_prob = bridgesampling::post_prob(logL)) %>%
dplyr::select(-logL)
marginal_likelihoods %>%
dplyr::mutate(post_prob = sprintf("%.3f%%", 100 * post_prob)) %>%
dplyr::left_join(mse, by = "model") %>%
dplyr::mutate_all(dplyr::funs(replace(., is.na(.), "--"))) %>%
dplyr::mutate(
change = factor(dplyr::case_when(
grepl("levelling-off", model, fixed = TRUE) ~ 1,
grepl("Gompertz", model) ~ 2,
), 1:2, c("Gradual level-off", "Gompertz function")),
intercept = factor(dplyr::case_when(
grepl("levelling-off", model, fixed = TRUE) & grepl("(v2)", model, fixed = TRUE) ~ 1,
grepl("levelling-off", model) & grepl("(v3)", model, fixed = TRUE) ~ 2,
grepl("Gompertz", model) & grepl("(v2)", model, fixed = TRUE) ~ 3,
), 1:3, c("Month", "Day of year", "Month")),
model = sub("w\\/.*", "", model),
) %>%
dplyr::select(model, intercept, change, post_prob, mse, dplyr::everything()) %>%
dplyr::arrange(model, intercept, change) %>%
kable(
align = c("l", "l", "l", "r", "r", "r", "c"), booktabs = TRUE, linesep = "",
col.names = c("Time Series", "Intercept", "Change Model", "Pr(Model|Data)", "MSE (Before/After)", "Estimate (SE)", "95% CI"),
caption = "Comparison metrics -- posterior model probability (probability of model given data) and mean squared error (MSE) of predictions before and after intervention (smaller is better) -- of various models of desktop traffic from search engines, with point estimate, standard error (SE), and 95\\% credible interval from the fitted models for intervention effect $\\delta_0$. These were quickly fitted to aid model selection, so the estimates in the final model differ due to more extensive MCMC sampling for final inference."
) %>%
kable_styling(latex_options = c("striped", "hold_position")) %>%
add_header_above(c(
"Model Specification" = 3, "Model Metrics" = 2,
"Intervention Effect ($\\\\delta_0$)" = 2
), escape = FALSE) %>%
landscape()
```