appendix.Rmd

---
output:
  md_document:
    variant: markdown_strict+raw_tex
---
\clearpage

\normalsize

```{r appendix_setup, include=FALSE}
library(knitr); library(kableExtra)
opts_chunk$set(
  echo = FALSE, message = FALSE, warning = FALSE,
  dev = "png", dpi = 600
)
opts_knit$set(out.format = "latex")
options(knitr.table.format = "latex", scipen = 500, digits = 4)
library(here); library(magrittr); library(ggplot2)
source(here("summarizing.R"))
```

\section*{Appendix}

```{r appendix_model_summary}
search_engine_traffic_fit <- readr::read_rds(here("final", "fit_arma21r3ev3-longer.rds"))
posterior_summary <- broom::tidyMCMC(
  search_engine_traffic_fit,
  pars = c("delta0", "omega1", "limz", "gamma_mean", "gamma", "phi", "theta", "beta"),
  estimate.method = "median", conf.int = TRUE, conf.method = "HPDinterval"
)
term_labels <- dplyr::data_frame(
  term = c("delta0", "omega1", "limz", "gamma_mean",
           paste0("gamma[", 1:12, "]"), paste0("phi[", 1:3, "]"), paste0("theta[", 1:2, "]"),
           paste0("beta[", 1:7, "]")),
  label = c("$\\delta_0$ (immediate impact)", "$\\omega_1$ (impact gradation)",
            "$\\frac{\\delta_0}{1 - \\omega_1}$ (long-term impact)",
            "$\\mu_\\gamma$ (daily average)",
            paste0("$\\gamma_{", 1:12, "}$ (", month.name, " average)"),
            paste0("$\\phi_", 1:3, "$"), paste0("$\\theta_", 1:2, "$"),
            "$\\beta_1$ (linear trend over time)",
            paste0("$\\beta_", 2:7, "$ (", c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"), ")")),
  section = c(rep(1, 3), rep(2, 13), rep(4, 3), rep(5, 2), 6, rep(3, 6))
)
posterior_summary %>%
  dplyr::left_join(term_labels, by = "term") %>%
  dplyr::arrange(section) %>%
  dplyr::select(-c(term, section)) %>%
  refine_hpd(4) %>%
  kable(
    align = c("l", "r", "c"), booktabs = TRUE, linesep = "", escape = FALSE,
    col.names = c("Parameter", "Estimate (Standard Error)", "95\\% HPD Interval"),
    caption = "Estimates and confidence intervals for parameters in the model of desktop traffic to the Italian Wikipedia from all recognized search engines The model was fit to pageview counts \\emph{in millions}. For example, when reading the table, $\\beta_2 = 0.67$ indicates more 670K pageviews on average on Monday relative to Sunday."
  ) %>%
  kable_styling(latex_options = c("striped", "hold_position")) %>%
  group_rows(index = c(
    "Intervention effect" = 3,
    "Averages" = 13,
    "Weekly seasonality (additive, relative to Sunday)" = 6,
    "Autoregressive (AR) component" = sum(grepl("phi", posterior_summary$term)),
    "Moving average (MA) component" = sum(grepl("theta", posterior_summary$term)),
    "Other effects" = 1
  ))
```

\newpage

```{r appendix_marginal_likelihoods}
if (file.exists(here("fits_index.csv"))) fits <- readr::read_csv(here("fits_index.csv"))
delta0_hpd <- readr::read_csv(here("results", "delta0_hpd.csv")) %>%
  refine_hpd %>% dplyr::select(-c(term, rhat))
mse <- readr::read_csv(here("results", "mse.csv")) %>%
  dplyr::group_by(model) %>%
  dplyr::summarize(mse = sprintf("%.3f / %.3f", mse_before, mse_after)) %>%
  dplyr::left_join(delta0_hpd, by = "model")
marginal_likelihoods <- set_names(as.list(fits$lml_path), fits$model) %>%
  purrr::map(readr::read_rds) %>%
  purrr::map_df(~ dplyr::data_frame(logL = .x$logml), .id = "model") %>%
  dplyr::mutate(post_prob = bridgesampling::post_prob(logL)) %>%
  dplyr::select(-logL)
marginal_likelihoods %>%
  dplyr::mutate(post_prob = sprintf("%.3f%%", 100 * post_prob)) %>%
  dplyr::left_join(mse, by = "model") %>%
  dplyr::mutate_all(dplyr::funs(replace(., is.na(.), "--"))) %>%
  dplyr::mutate(
    change = factor(dplyr::case_when(
      grepl("levelling-off", model, fixed = TRUE) ~ 1,
      grepl("Gompertz", model) ~ 2,
    ), 1:2, c("Gradual level-off", "Gompertz function")),
    intercept = factor(dplyr::case_when(
      grepl("levelling-off", model, fixed = TRUE) & grepl("(v2)", model, fixed = TRUE) ~ 1,
      grepl("levelling-off", model) & grepl("(v3)", model, fixed = TRUE) ~ 2,
      grepl("Gompertz", model) & grepl("(v2)", model, fixed = TRUE) ~ 3,
    ), 1:3, c("Month", "Day of year", "Month")),
    model = sub("w\\/.*", "", model),
  ) %>%
  dplyr::select(model, intercept, change, post_prob, mse, dplyr::everything()) %>%
  dplyr::arrange(model, intercept, change) %>%
  kable(
    align = c("l", "l", "l", "r", "r", "r", "c"), booktabs = TRUE, linesep = "",
    col.names = c("Time Series", "Intercept", "Change Model", "Pr(Model|Data)", "MSE (Before/After)", "Estimate (SE)", "95% CI"),
    caption = "Comparison metrics -- posterior model probability (probability of model given data) and mean squared error (MSE) of predictions before and after intervention (smaller is better) -- of various models of desktop traffic from search engines, with point estimate, standard error (SE), and 95\\% credible interval from the fitted models for intervention effect $\\delta_0$. These were quickly fitted to aid model selection, so the estimates in the final model differ due to more extensive MCMC sampling for final inference."
  ) %>%
  kable_styling(latex_options = c("striped", "hold_position")) %>%
  add_header_above(c(
    "Model Specification" = 3, "Model Metrics" = 2,
    "Intervention Effect ($\\\\delta_0$)" = 2
  ), escape = FALSE) %>%
  landscape()
```