In [None]:
# Syntax in R as usual
# Variable names convention: https://docs.google.com/spreadsheets/d/1SzU4PcIEUsAGnKKyAcugHO2O2aZW29sf9a_cC-FAElk/edit#gid=1679989021

In [1]:
## Load libraries
install.packages("pacman") # pacman helps load several packages at once

Updating HTML index of packages in '.Library'
Making 'packages.html' ... done


In [24]:
pacman::p_load("haven", # loading .dta file
               "tidyverse", "dplyr", "data.table", "psych") # data wrangling/descriptive stats

Updating HTML index of packages in '.Library'
Making 'packages.html' ... done

tidyverse installed
Updating HTML index of packages in '.Library'
Making 'packages.html' ... done

dplyr installed
“Failed to install/load:
tidyverse, dplyr”

In [None]:
## Data pre-processing from full dataset
# This step may vary by institutions. 

# UCI
ucifull <- read_dta("/Volumes/OS X/apchem.dta") # your dataset
# Exclude students who entered before 2013
ucifull <- subset(ucifull, year%in%c('2013', '2014', '2015', '2016', '2017'))

# Drop transfer
uciap <- subset(ucifull, transferflg==0)

# Drop nondegree students
uciap <- subset(uciap,nondegreeflg==0)

# Flag international students as international; NR-Aliens==1; Others==0
# for UCI international students had to turn in toefl or ielts scores; used this as a proxy
uciap$international <- ifelse(!is.na(uciap$toeflsr) | !is.na(uciap$ieltssr, 1, 0)

# Standardize continuous demographic var (i.e., hsgpa)
uciap$hsgpa_z <- c(scale(uciap$hsgpa, center=T, scale=T))

# Generate URM (Underrepresented Minorities) code
# Var list recode it as 3 levels (0: white; 2: asian; 1: black, latino, etc. AP project coded it as dichotomous 0/1)                              
# ethniccode: 1. Hispanic/Latino; 2. American Indian/Alaska Native; 3. Asian; 4. Black or African American; 5. Native Hawaiian or Pacific Islander; 6. White
uciap$ethniccode_cat <- 0
uciap$ethniccode_cat <- ifelse(uciap$ethniccode%in%c(1, 2, 4, 5), 1, 
                               ifelse(uciap$ethniccode==3, 2, 0)) # asian = 2, white=0
uciap$ethniccode_cat2 <- ifelse(uciap$ethniccode_cat%in%c(0, 2), 0, 1) # white/asian = 0, non-white = 1
                              

In [None]:
# Create cohort var: get the lowest value of year = first year entering institution from "data.table"
# Var "year" = year of taking courses
cohort <- unique(setDT(uciap)[order(year)], by="new_studentid")
names(cohort)[names(cohort)=="year"] <- "cohort" # change var name from "year" to "cohort"
# Make new dataframe that consists of the unique student ID and the cohort # (first year entering)
cohort2 <- cohort$cohort
cohort2 <- data.frame(cbind(cohort2, cohort$new_studentid))
colnames(cohort2) <- c("cohort", "new_studentid")

# Order dataset by studentid
uciap_order <- apfull[order(new_studentid),]

# Merge datasets where "new_studentid" in cohort2 matches with multiple in the overall dataset
uciap <- merge(cohort2, uciap_order, by.x="new_studentid", by.y="new_studentid")

In [None]:
# Create flags for AP subjects and scores
# Chemistry. AP codes in dataset range from 1-20
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(25)
  uciap[to_replace, "apchem"] <- 1
}

# Replace the "NA" in "apchem" flag with 0
uciap$apchem[is.na(uciap$apchem)] <- 0

# Create "apchemsr" - AP Chem score and "apchemyr" - year students took AP Chem
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(25)
  uciap[to_replace, "apchemsr"] <- lapply(uciap[paste0("apexam", i, "sr")], function(n) as.numeric(n))
  uciap[to_replace, "apchemyr"] <- lapply(uciap[paste0("apexam", i, "tsyr")], function(n) as.numeric(n))
}
#####################
# Generate apphysicsc mechanics var: took AP Physics mechanics or not, AP Physics mechanics score, and year of taking
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(80)
  uciap[to_replace, "apphysicscmec"] <- 1
}

# Replace the "NA" with 0
uciap$apphysicscmec[is.na(uciap$apphysicscmec)] <- 0

# Create score and year studnets took AP Physics C Mechanics
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(80)
  uciap[to_replace, "apphysicscmecsr"] <- lapply(uciap[paste0("apexam", i, "sr")], function(n) as.numeric(n))
  uciap[to_replace, "apphysicscmecyr"] <- lapply(uciap[paste0("apexam", i, "tsyr")], function(n) as.numeric(n))
}
#####################
# Generate apphysicsc electricity var
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(82)
  uciap[to_replace, "apphysicscelec"] <- 1
}
                                                 
# Replace the "NA" with 0
uciap$apphysicscelec[is.na(uciap$apphysicscelec)] <- 0
                                                 
# Create score and year students took AP Physics C Mechanics
for (i in 1:20) {
  to_replace <- uciap[[paste0("apexam", i, "cd")]]%in%c(82)
  uciap[to_replace, "apphysicscelecsr"] <- lapply(uciap[paste0("apexam", i, "sr")], function(n) as.numeric(n))
  uciap[to_replace, "apphysicscelecyr"] <- lapply(uciap[paste0("apexam", i, "tsyr")], function(n) as.numeric(n))
}

In [None]:
# Create flags for apskip
# UCI only had it for CHEM and PHYSICS
uciap$apskipchem <- ifelse(uciap$apchemsr%in%c(4,5), 1, 0)
uciap$apskipphy7c <- ifelse(uciap$apphysicscelecsr%in%c(4,5)|
                               uciap$apphysicscmecsr%in%c(4,5), 1, 0)

In [None]:
# Change ACT score to SAT
uciap <- apply(uciap, 2, function(x) {x <- plyr::revalue(x, c("36"='800', "35"='780', "34"='760', "33"='740', "32"='720', "31"='710',
"30"='700', "29"='680', "28"='660', "27"='640', "26"='610',
"25"='590', "24"='580', "23"='560', "22"='540', "21"='530',
"20"='520', "19"='510', "18"='500', "17"='470', "16"='430',
"15"='400', "14"='360', "13"='330', "12"='310', "11"='280', "10"='260')); x})

uciap <- as.data.frame(uciap)
uciap$actmathsr <- as.numeric(as.character(uciap$actmathsr))
summary(uciap$actmathsr)
uciap$bstsat1mathsr <- as.numeric(as.character(uciap$bstsat1mathsr))

# Replace with ACT scores if Math scores missing
uciap$mathprof <- ifelse(is.na(uciap$bstsat1mathsr), uciap$actmathsr, uciap$bstsat1mathsr)
summary(uciap$mathprof)

# Do the same thing for English score
uciap$actenglsr <- as.numeric(as.character(uciap$actenglsr))
uciap$bstsat1readsr <- as.numeric(as.character(uciap$bstsat1readsr))

uciap$engprof <- ifelse(is.na(uciap$bstsat1readsr), uciap$actenglsr, uciap$bstsat1readsr)

summary(uciap$engprof)

# Caveat: the revalue function change variable class to character
# will need to recode those that are supposedly numeric (e.g., gpa, coursegrade) to numeric in later analyses

In [None]:
# Retain necessary vars and write .csv to local folder
uciap_0926 <- uciap[c("new_studentid", "studentid", "apskipchem", "apskipphy7c", "apchem", 
                      "apchemyr","apchemsr", "apphysicscmec", "apphysicscelec",
                       "apphysicscmecyr", "apphysicscelecyr", "apphysicscmecsr", "apphysicscelecsr",
                      "coursecode", "coursetitle", "courseid", "coursenum", "dept", "level", "newfresh",
                      "numgrade", "female", "ethniccode", "ethniccode_cat", "ethniccode_cat2",
                      "firstgen", "lowincomeflg", "ellflg", "international", 
                      "hsgpa", "hsgpa_z", "bstsat1mathsr", "bstsat1readsr", "actenglsr", "actmathsr", "mathprof", "engprof",
                      "year", "admitdate", "termsenrolled", "yearsenrolled", "cohort", "stemmajor")] #stemmajor 1 = no, 2 = yes
# Things in the variable sheet not included: fam_income, us_hs, gpao, begin_term_cum_gpa, instructor_name, current_major
write.csv(uciap_0926, "/Users/thicn/Documents/AP Skip/uciap_0926.dta") # change to your local directory

In [11]:
## Load data
# This step may vary by institutions
apfull <- read.csv("/Users/thicn/Documents/AP Skip/uciap_0926.dta") # change to your local directory/dataset

In [6]:
head(apfull, 3) # data still in long format

X,new_studentid,studentid,apskipchem,apskipphy7c,apchem,apchemyr,apchemsr,apphysicscmec,apphysicscelec,...,actenglsr,actmathsr,mathprof,engprof,year,admitdate,termsenrolled,yearsenrolled,cohort,stemmajor
1,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2
2,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2
3,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2


In [7]:
# Renaming
names(apfull)

In [12]:
colnames(apfull) <- c("X", "st_id", "studentid",
                     "apskipper_GChem1", "apskipper_Phys1",
                     "aptaker_Chem", "apyear_Chem", "apscore_Chem", "aptaker_Mec", "aptaker_Elec",
                     "apyear_Mec", "apyear_Elec", "apscore_Mec", "apscore_Elec",
                     "coursecode", "crs_name", "crs_catalog", "class_number", "crs_sbj", "level", "newfresh", # level = firstyear, sophomore, etc.
                     "numgrade", "gender", "ethniccode", "ethniccode_cat", "ethniccode_cat2",
                     "firstgen", "lowincomeflag", "ellflag", "international", 
                     "hsgpa", "hsgpa_z", "bstsat1mathsr", "bstsat1readsr", "actenglsr", "actmathsr", "mathsr", "englsr",
                     "crs_term", "admitdate", "enrl_from_cohort", "yearsenrolled", "cohort", "stemmajor")

In [13]:
head(apfull)

X,st_id,studentid,apskipper_GChem1,apskipper_Phys1,aptaker_Chem,apyear_Chem,apscore_Chem,aptaker_Mec,aptaker_Elec,...,actenglsr,actmathsr,mathsr,englsr,crs_term,admitdate,enrl_from_cohort,yearsenrolled,cohort,stemmajor
1,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2
2,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2
3,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2014,F14,1,0.25,2014,2
4,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2015,F14,4,1.0,2014,2
5,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2015,F14,5,1.25,2014,2
6,25,10006293,0,0,0,0,0,0,0,...,580,660,580,480,2015,F14,5,1.25,2014,2


In [15]:
table(apfull$apyear_Chem)


     0   2007   2008   2009   2010   2011   2012   2013   2014   2015   2016 
324238     11     21    206   2246   7992  13651  13992  15547  13597   5457 
  2017 
   946 

In [18]:
## Data processing for skip eligibility
# Subset for CHEM; starting with apyear from 2014
chemap <- subset(apfull, class_number%in%c("0001A", "0001B") & crs_name=="GENERAL CHEMISTRY" & 
                 apyear_Chem%in%c("2014", "2015", "2016", "2017"))
dim(chemap)
head(chemap)

Unnamed: 0,X,st_id,studentid,apskipper_GChem1,apskipper_Phys1,aptaker_Chem,apyear_Chem,apscore_Chem,aptaker_Mec,aptaker_Elec,...,actenglsr,actmathsr,mathsr,englsr,crs_term,admitdate,enrl_from_cohort,yearsenrolled,cohort,stemmajor
527,527,175,10127632,0,0,1,2015,3,0,0,...,,,580,600,2017,F16,2,0.5,2016,2
528,528,175,10127632,0,0,1,2015,3,0,0,...,,,580,600,2017,F16,3,0.75,2016,2
591,591,189,10141872,0,0,1,2015,3,0,0,...,,,730,640,2015,F15,1,0.25,2015,2
602,602,189,10141872,0,0,1,2015,3,0,0,...,,,730,640,2016,F15,2,0.5,2015,2
908,908,240,10174909,0,0,1,2014,2,0,0,...,530.0,640.0,620,570,2017,F16,2,0.5,2016,2
1190,1190,318,10236925,0,0,1,2015,3,0,0,...,580.0,710.0,710,610,2016,F16,1,0.25,2016,2


In [20]:
# Get students who were eligible to skip 1A and did not 
chemap$notSkipthoughEl <- ifelse(chemap$apskipper_GChem1==1 & chemap$class_number=="0001A", 1, 0) 
# double check
table(chemap$notSkipthoughEl, chemap$apscore_Chem)
# make a flag for those
chemNotSkipthoughEl <- subset(chemap, notSkipthoughEl==1)

   
      1   2   3   4   5
  0 456 770 588 132  30
  1   0   0   0  25  11

In [23]:
# Add the flag to the full sample
apfull$chemNotSkipthoughEl <- ifelse(apfull$studentid%in%chemNotSkipthoughEl$studentid, 1, 0)

apfull$aprealskipper_GChem1 <- ifelse(apfull$chemNotSkipthoughEl==0 & apfull$apskipper_GChem1==1, 1, 0)

table(apfull$aprealskipper_GChem1, apfull$apscore_Chem)

   
         0      1      2      3      4      5
  0 324238  18555  18415  16264    322     99
  1      0      0      0      0  10273   4320

In [27]:
# Physics
# Subset for Physics first course; starting with apyear from 2015 for either test
physicsap <- subset(apfull, class_number%in%c("0007C", "0007D", "0007E") & crs_name=="CLASSICAL PHYSICS" & 
                 apyear_Mec%in%c("2015", "2016", "2017")|apyear_Elec%in%c("2015", "2016", "2017"))
dim(physicsap)
head(physicsap)

Unnamed: 0,X,st_id,studentid,apskipper_GChem1,apskipper_Phys1,aptaker_Chem,apyear_Chem,apscore_Chem,aptaker_Mec,aptaker_Elec,...,mathsr,englsr,crs_term,admitdate,enrl_from_cohort,yearsenrolled,cohort,stemmajor,chemNotSkipthoughEl,aprealskipper_GChem1
1227,1227,329,10248503,0,1,0,0,0,1,0,...,750,660,2016,F16,1,0.25,2016,2,0,0
1228,1228,329,10248503,0,1,0,0,0,1,0,...,750,660,2017,F16,4,1.0,2016,2,0,0
1232,1232,329,10248503,0,1,0,0,0,1,0,...,750,660,2017,F16,4,1.0,2016,2,0,0
2544,2544,647,10482351,0,1,0,0,0,1,0,...,750,780,2015,F14,5,1.25,2014,2,0,0
2553,2553,647,10482351,0,1,0,0,0,1,0,...,750,780,2015,F14,3,0.75,2014,1,0,0
2652,2652,673,10498725,0,1,1,2010,1,1,0,...,680,450,2013,F12,5,1.25,2013,1,0,0


In [30]:
physicsap$notSkipthoughEl <- ifelse(physicsap$apskipper_Phys1==1 & physicsap$class_number=="0007C" & 
                                physicsap$crs_name=="CLASSICAL PHYSICS", 1, 0)
table(physicsap$notSkipthoughEl)
physNotSkipthoughEl <- subset(physicsap, notSkipthoughEl==1)


   0    1 
1582   58 

In [34]:
# Add the flag to the full sample
apfull$physNotSkipthoughEl <- ifelse(apfull$studentid%in%physNotSkipthoughEl$studentid, 1, 0)

apfull$aprealskipper_Phys1 <- ifelse(apfull$physNotSkipthoughEl==0 & apfull$apskipper_Phys1==1, 1, 0)

table(apfull$aprealskipper_Phys1, apfull$apscore_Mec)
table(apfull$aprealskipper_Phys1, apfull$apscore_Elec)

   
       1    2    3    4    5
  0 1775 2652 2880  321  210
  1    0    0    0 1728 1439

   
      1   2   3   4   5
  0 365 643 827 455 454
  1 114 132 102 107 166

In [36]:
# Write df for later analyses
write.csv(apfull, "/Users/thicn/Documents/AP Skip/apfull_SkipEl.csv") # your directory