# Make Topic Browser

Create a DFR topic browser site.
Data exploration can be done through live online browsing or download.

_This is an R notebook, not a Python notebook!_

-  v1 Andrew Goldstone
-  v2 Lindsay Thomas
-  v3 Jeremy Douglass
-  ...
-  v3.4 2016-10-18 renamed for modular workflow
-  v3.5 2016-10-24 changed input to metadata-dfrb.csv
-  v3.5 2016-10-25 added dfrb.min.js swap, removed comment on "Not all expected metadata columns are present" warning now that columns are reordered and renamed during clean stage
-  v3.6 2016-11-07 user parameters for model input and browser output
-  v3.7 2017-10-31 replace dfrb.min.js from safe template source area
-  v3.8 2017-10-31 relocate all caches

This could be revised according to the Ode to Here: https://gist.github.com/jennybc/362f52446fe1ebc4c49f

## SETTINGS

In [None]:
caches              <- "caches"

metadataDir         <- "caches/metadata"
metadataFile        <- "caches/metadata/metadata.csv"
metadataFileReorder <- "caches/metadata/metadata-dfrb.csv"

modelDir            <- "caches/model"
modelFile             <- "topics.mallet"
modelState            <- "topic-state.gz"
modelKeys             <- "keys.txt"
modelComposition      <- "composition.txt"
modelCounts           <- "topic_counts.txt"

dfbScript           <- "scripts/dfrbrowser/js/dfb.min.js.custom"
dfbOutputDir        <- "browser"
dfbZipFile          <- "browser.zip"

# these settings could also be externalized as a file, e.g.
#    config.R
# and then loaded with:
#    source("config.R")

# to access shared YAML settings, see:
#    https://stackoverflow.com/a/5276466/7207622

## RUN

In [None]:
## increase available working memory. Must be done before installing dfrtopics.
## not sure if this needs to be done if not using dfrtopics for modeling, though

options(java.parameters="-Xmx2g")

In [None]:
## set working directory to whatever directory holds required files

## don't need to set the working dir if it is this project folder
# setwd("/home/jovyan/work/write/projects/MY_PROJECT_NAME_HERE/")

cat(list.files(), sep = "\n")

In [None]:
## include required packages

library("rJava")

library("dplyr")
library("ggplot2")
library("lubridate")
library("stringr")
library("readr")

library("mallet")
library("dfrtopics")

**Expected Warning**: "Attaching package: ‘dplyr’..." 

In [None]:
## bigtabulate is optional -- causes errors in load_from_mallet_state

# library("bigtabulate")

## Topics

In [None]:
## Load topic model from MALLET.
## Default is already created with project,
## customize with alternate filename if running new models.

## workaround: temporarily change python to python2, then back
Sys.setenv(PATH = "/opt/conda/envs/python2/bin:/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
system("python --version")

m <- load_from_mallet_state( mallet_state_file = paste0(modelDir,"/",modelState), instances_file = paste0(modelDir,"/",modelFile))

## workaround end: restore python to python3
Sys.setenv(PATH = "/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
system("python --version")


## -----------------------------

## for debugging
# Sys.getenv()
# system("python --version")

## The error:
## File "/opt/conda/lib/R/library/dfrtopics/python/simplify_state.py", line 25
##    print "{},{},{},{}".format(doc,typeindex,topic,
## SyntaxError: invalid syntax

## This is a script that uses python2 syntax when default env is python3
## Solution: switch env with conda -- in container Terminal (or from Jupyter Terminal):
#      source activate python2

**Expected Warning**: bigtabulate

# Metadata

In [None]:
## Read in metadata spreadsheet, store in variable md.
## Default is already created with project,
## customize with alternate filename if mapping different metadata.

md <- read_csv(
    paste0(metadataFileReorder),
    col_names=T,
    col_types=str_c(rep("c", 8),
    collapse="")
    )

In [None]:
## change value of metadata attribute in m object to md

m$metadata<-md

In [None]:
## create browser files

export_browser_data(m, out_dir=dfbOutputDir, overwrite=TRUE, supporting_files=TRUE)

In [None]:
## replace javascript with WE1S custom code

## back up old js
## ...works on command line, but not from R system
# system("mv dfb.min.js dfb.min.js_$(date +%Y%m%d%H%M%S)")

## copy over custom browser javascript for WE1S
system(paste0("cp ", dfbScript," ",dfbOutputDir,"/js/dfb.min.js"))
list.files(path=paste0(dfbOutputDir,'/js'), pattern="dfb.*")

In [None]:
## don't need to set working directory output if we are in a project folder
# setwd("/home/jovyan/work/write/projects/thisproject/")

In [None]:
zip(dfbZipFile, paste0(dfbOutputDir,"/"))

In [None]:
## Generate an HTML menu with live browsing and download links
## based on the current working directory.

project_name <- basename(getwd())
project_reldir <- strsplit( getwd(), "/write/" )[[1]][2]

IRdisplay::display_html(data=paste(
    "<h2>Live</h2>",
    "<p>To view the browser live:</p>",
    "  <ul>",
    paste("    <li><a href='http://harbor.english.ucsb.edu:10001/", project_reldir, "/", dfbOutputDir, "/' target='_blank'>Browser LIVE</a></li>", sep = ""),
    "  </ul>",
    "<h2>Download</h2>",
    "<p>To download and view the browser through a webserver hosted on your local machine:</p>",
    "  <ol>",
    "    <li><a href='",dfbZipFile,"' target='new'>Download browser.zip</a></li>",
    "    <li>Unzip browser.zip</li>",
    "    <li>Open a shell/terminal, and navigate to the browser directory</li>",
    "    <li>On Linux / OSX, launch local webserver by running:<br><code>./bin/server</code></li>",
    "    <li>View from your local webserver: <a href='http://localhost:8888/' target='_blank'>http://localhost:8888/</a></li>",
    "  </ol>"))