<a href="https://colab.research.google.com/github/seismic2020/WG1-P4/blob/master/SEISMIC_AP_Shiny_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# SEISMIC AP Shiny App 
# Load required packages #
if (!require("pacman")) install.packages("pacman")
library(pacman)
pacman::p_load("tidyverse", "dplyr", "shiny", "gridExtra")

In [0]:
# Import raw dataset (change the .csv name to your own filename if needed) #
# Imported .csv must have the following columns:
#   COURSE: A string labeling the course sequence (e.g., the name of the first course in the sequence)
#   AP: A number from 0-5 indicating the score on the AP exam associated with that COURSE, with 0 meaning the student did not take the exam
#   COURSE2.GRADE: A numeric grade (0-4) in the second course in the sequence
#   COURSE2.GRADE.REG: The numeric grade in the second course in the sequence fitted with a linear regression
#   SKIP: A string indicating whether or not the student skipped the first course in the sequence

df_shiny_full <- read.csv("/Users/thicn/Documents/AP Skip/SEISMIC_AP_SHINY2.csv") %>%
# Create a deidentified ID variable
    mutate(id = rownames(df_shiny_full))

In [0]:
#   ==== Build Shiny App ====
# Define UI ####
ui <- fluidPage(
  titlePanel("Grades by AP Score"),
  sidebarLayout(
    sidebarPanel(
      #Select course to plot
      selectInput(inputId = "COURSE", 
                  label = "Select a course:",
                  choices = unique(df_shiny_full$COURSE), 
                  selected = unique(df_shiny_full$COURSE)[1]),
      #Select y min and max for the uncontrolled model
      sliderInput("y.range.uncontrolled", "Uncontrolled Model: Y Limits", min=1, max=4, step=0.25, value=c(1,4)),
      #Select y min and max for the Fitted model
      sliderInput("y.range.fitted", "Fitted Model: Y Limits", min=1, max=4, step=0.25, value=c(1,4))
    ),
  mainPanel(tabPanel(
    "Plot",
    fluidRow(
      plotOutput("linePlotUncontrolled"),
      plotOutput("linePlotFitted"),
      plotOutput("histogram")
    )
  )))
)

# Define Server ####
server <- function(input, output) {
  #Subset data based on the given course sequence
  #though we'll let ggplot do most of the summary later, we'll precalculate some of the means and standard errors to use for error bars
  chartData <- reactive({
    df_shiny_full %>%
      filter(COURSE == input$COURSE) %>%
      group_by(AP, SKIP) %>%
      mutate(n = n()) %>%
      mutate(COURSE2.GRADE.mean = mean(COURSE2.GRADE, na.rm = TRUE),
             COURSE2.GRADE.sd = sd(COURSE2.GRADE, na.rm = TRUE),
             COURSE2.GRADE.n = sum(!is.na(COURSE2.GRADE)),
             COURSE2.GRADE.se = COURSE2.GRADE.sd / sqrt(COURSE2.GRADE.n),
             COURSE2.GRADE.REG.mean = mean(COURSE2.GRADE.REG, na.rm = TRUE),
             COURSE2.GRADE.REG.sd = sd(COURSE2.GRADE.REG, na.rm = TRUE),
             COURSE2.GRADE.REG.n = sum(!is.na(COURSE2.GRADE.REG)),
             COURSE2.GRADE.REG.se = COURSE2.GRADE.REG.sd / sqrt(COURSE2.GRADE.REG.n))
  })
  
  #Get a string based on the chosen course to use in figure labels
  #Modify this to match the strings in your COURSE variable
  subj.label <- reactive({
    case_when(
      #input$COURSE == "BIO1" ~ "Biology",
      input$COURSE == "GCHEM1" ~ "Chemistry",
      input$COURSE == "PHYS1"~ "Physics"
      )
    })
  
  output$linePlotUncontrolled <- renderPlot({
    ggplot(data=chartData(), aes(y = COURSE2.GRADE, x = AP, color = SKIP, fill = SKIP, na.omit = TRUE)) +
      geom_point(stat = 'summary', fun.y = 'mean', size=3) +
      geom_smooth(stat = 'summary', method = 'loess') + #use shaded fill to indicate the size of the standard error
      geom_errorbar(aes(ymin = COURSE2.GRADE.mean - COURSE2.GRADE.se, ymax = COURSE2.GRADE.mean + COURSE2.GRADE.se), width=0.1) + #add standard error bars
      #on y and x axes: sec.axis = dup_axis(labels = NULL) adds tick marks to the opposite side of the bounding box without adding duplicated labels
      #x axis: since non-AP takers are coded as 0, manually relabel the tick marks to replace "0" with "Didn't Take"
      scale_x_continuous(sec.axis = dup_axis(labels = NULL), labels=c("Didn't Take", "1", "2", "3", "4", "5")) +
      #y-axis: set breaks to every 0.5 grade points
      scale_y_continuous(sec.axis = dup_axis(labels = NULL), breaks=seq(0, 4, by=0.5)) +
      #use coord_cartesian to limit the display without cutting out error bars that may fall outside the chosen range
      coord_cartesian(ylim=c(input$y.range.uncontrolled[1], input$y.range.uncontrolled[2])) +
      theme_classic() +
      theme(
        panel.border = element_rect(color = "black", fill=NA), #put a border around the whole plot
        axis.title.x.top = element_blank(), #no x axis title on top
        axis.title.y.right = element_blank(), #no y axis title on the right
        legend.position=c(0.25, 0.85), #position the legend inside the plot (may want to make this adjustable with Shiny input options)
        legend.background = element_blank(),
        legend.box.background = element_rect(color = "black") #black rectangle surrounding the legend
      ) +
      #construct labels using the subj.label() defined earlier
      labs(x = "AP Score", y = paste("Mean Grade in", subj.label(), "2"), title= paste(subj.label(), "Uncontrolled Model"))
  })
  
  output$linePlotFitted <- renderPlot({
    ggplot(data=chartData(), aes(y = COURSE2.GRADE.REG, x = AP, color = SKIP, fill = SKIP, na.omit = TRUE)) +
      geom_point(stat = 'summary', fun.y = 'mean', size=3) +
      geom_errorbar(aes(ymin = COURSE2.GRADE.REG.mean - COURSE2.GRADE.REG.se, ymax = COURSE2.GRADE.REG.mean + COURSE2.GRADE.REG.se), width=0.1) +
      geom_smooth(stat = 'summary', method = 'loess') +
      scale_x_continuous(sec.axis = dup_axis(labels = NULL), breaks=seq(0, 5, by=1), labels=c("Didn't Take", "1", "2", "3", "4", "5")) +
      scale_y_continuous(sec.axis = dup_axis(labels = NULL), breaks=seq(0, 4, by=0.5)) +
      coord_cartesian(ylim=c(input$y.range.fitted[1], input$y.range.fitted[2])) +
      theme_classic() +
      theme(
        panel.border = element_rect(color = "black", fill=NA),
        axis.title.x.top = element_blank(),
        axis.title.y.right = element_blank(),
        legend.position=c(0.25, 0.85),
        legend.background = element_blank(),
        legend.box.background = element_rect(color = "black")
      ) +
      labs(x = "AP Score", y = paste("Mean Grade in", subj.label(), "2"), title= paste(subj.label(), "Fitted Model"))
  })
  
  output$histogram <- renderPlot({
    ggplot(data=chartData() %>% filter(AP != 0), aes(x = AP, color = SKIP, fill = SKIP, na.omit = TRUE)) +
      geom_histogram(stat='count', position = position_dodge(preserve = "single")) +
      scale_x_continuous(sec.axis = dup_axis(labels = NULL)) +
      scale_y_continuous(sec.axis = dup_axis(labels = NULL)) +
      theme_classic() +
      theme(
        panel.border = element_rect(color = "black", fill=NA),
        axis.title.x.top = element_blank(),
        axis.title.y.right = element_blank(),
        legend.position=c(0.25, 0.85),
        legend.background = element_blank(),
        legend.box.background = element_rect(color = "black")
      ) +
      labs(x = "AP Score", y = "Number of Students", title= paste("Histogram of AP", subj.label(), "Scores"))
  })
}
  
  # Launch Shiny App ####
  shinyApp(ui = ui, server = server) # The link in the output wouldn't run anymore since I paused analyses on my kernel
  # but if you run the codes in your local computer it should take you to a Shiny app with your own data.


Listening on http://127.0.0.1:5056

“Ignoring unknown parameters: fun.y”
“Removed 136 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Removed 136 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Ignoring unknown parameters: fun.y”
“Removed 281 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Removed 281 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Ignoring unknown parameters: binwidth, bins, pad”
“Ignoring unknown parameters: fun.y”
“Removed 136 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Removed 136 rows containing non-finite values (stat_summary).”
No summary function supplied, defaulting to `mean_se()`

“Ignoring unknown parameters: fun.y”
“Removed 136 rows containing non-finit