In [2]:
# AP Skip
# Linear reg model for shiny app
# Feb 27, 2020
# Code adapted from Eben's and Kyle's
######################################
# Load library
library(tidyverse)
library(dplyr)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.0     [32m✔[39m [34mpurrr  [39m 0.3.3
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.5
[32m✔[39m [34mtidyr  [39m 1.0.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [3]:
# Load dataset
setwd("/Users/thicn/Documents/AP Skip/") # change to your local directory
df_ind_chem <- read.csv("chemSelect_unique.csv")
df_ind_phys <- read.csv("physSelect_unique.csv")

In [4]:
# Start with CHEM
df_viz_gchem <- df_ind_chem %>%
  mutate(SKIP = case_when(
   skipped_course==0 ~ "NoSkip",
   skipped_course==1 ~ "Skip"
 )) %>%
 filter(!is.na(SKIP))

lm_chem <- lm(numgrade_2 ~ apscore + factor(firstgen) + factor(lowincomeflag) + factor(gender) + factor(ethniccode_cat) +
               hsgpa_z + scale(mathsr) + scale(englsr) + factor(crs_term), 
              df_viz_gchem, na.action=na.exclude)

df_viz_chem_shiny <- df_viz_gchem %>%
  mutate(AP = apscore) %>%
  mutate(COURSE2.GRADE = numgrade_2) %>%
  mutate(COURSE2.GRADE.REG =  fitted(lm_chem)) %>%
  mutate(COURSE = "GCHEM1") %>%
  dplyr::select(AP, COURSE, 
                COURSE2.GRADE, COURSE2.GRADE.REG, SKIP)

head(df_viz_chem_shiny)

AP,COURSE,COURSE2.GRADE,COURSE2.GRADE.REG,SKIP
0,GCHEM1,2.3,2.095493,NoSkip
3,GCHEM1,2.0,2.576573,NoSkip
0,GCHEM1,3.3,2.762103,NoSkip
0,GCHEM1,1.7,2.767599,NoSkip
0,GCHEM1,2.0,2.646307,NoSkip
0,GCHEM1,3.7,2.898258,NoSkip


In [6]:
#PHYSICS
df_viz_phys <- df_ind_phys %>%
  mutate(SKIP = case_when(
    skipped_course == 0 ~ "NoSkip",
    skipped_course == 1 ~ "Skip"
  )) %>%
  filter(!is.na(SKIP))

lm_phys <- lm(numgrade_2 ~ apscore + factor(firstgen) + factor(lowincomeflag) + factor(gender) + factor(ethniccode_cat) +
               hsgpa_z + scale(mathsr) + scale(englsr) + factor(crs_term), 
              df_viz_phys, na.action = na.exclude)

df_viz_phys_shiny <- df_viz_phys %>%
  mutate(AP = apscore) %>%
  mutate(COURSE2.GRADE = numgrade_2) %>%
  mutate(COURSE2.GRADE.REG =  fitted(lm_phys)) %>%
  mutate(COURSE = "PHYS1") %>%
  dplyr::select(AP, COURSE, 
                COURSE2.GRADE, COURSE2.GRADE.REG, SKIP)

head(df_viz_phys_shiny)

# Change all NA under AP score to 0
df_viz_phys_shiny$AP <- ifelse(is.na(df_viz_phys_shiny$AP), 0, df_viz_phys_shiny$AP)
head(df_viz_phys_shiny)

AP,COURSE,COURSE2.GRADE,COURSE2.GRADE.REG,SKIP
,PHYS1,3.7,,NoSkip
4.0,PHYS1,4.0,3.829205,NoSkip
,PHYS1,4.0,,NoSkip
,PHYS1,3.0,,NoSkip
,PHYS1,4.0,,NoSkip
,PHYS1,2.7,,NoSkip


AP,COURSE,COURSE2.GRADE,COURSE2.GRADE.REG,SKIP
0,PHYS1,3.7,,NoSkip
4,PHYS1,4.0,3.829205,NoSkip
0,PHYS1,4.0,,NoSkip
0,PHYS1,3.0,,NoSkip
0,PHYS1,4.0,,NoSkip
0,PHYS1,2.7,,NoSkip


In [7]:
#Combine Disciplines
df_SEISMIC_AP_SHINY <- bind_rows(df_viz_chem_shiny, df_viz_phys_shiny) %>%
  group_by(AP, COURSE) %>%
  dplyr::mutate(n.AP = n()) %>%
  ungroup

head(df_SEISMIC_AP_SHINY)
write.csv(df_SEISMIC_AP_SHINY, file = "SEISMIC_AP_SHINY2.csv")

AP,COURSE,COURSE2.GRADE,COURSE2.GRADE.REG,SKIP,n.AP
0,GCHEM1,2.3,2.095493,NoSkip,7111
3,GCHEM1,2.0,2.576573,NoSkip,640
0,GCHEM1,3.3,2.762103,NoSkip,7111
0,GCHEM1,1.7,2.767599,NoSkip,7111
0,GCHEM1,2.0,2.646307,NoSkip,7111
0,GCHEM1,3.7,2.898258,NoSkip,7111
