## STAT 344 Group Project

In [1]:
# load the packages
library(dplyr)


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




In [2]:
# read the data
data <- read.csv("Engineering_graduate_salary.csv", header=TRUE)

# set seed
set.seed(1)

In [3]:
# some important number
N <- nrow(data)  # Total population size
N
sample_size <- 300

In [5]:
# SRS
srs_sample <- data %>% sample_n(sample_size)

# Calculate mean salary & se
mean_salary_srs <- mean(srs_sample$Salary, na.rm = TRUE)
se_salary_srs <- sd(srs_sample$Salary, na.rm = TRUE) / sqrt(sample_size)
se_salary_srs_fpc <- se_salary_srs * sqrt((N - sample_size) / (N - 1))



# Calculate proportion of students with A GPA & se
proportion_gpa_srs <- mean(srs_sample$collegeGPA >= 80, na.rm = TRUE)
se_proportion_gpa_srs <- sqrt(proportion_gpa_srs * (1 - proportion_gpa_srs) / sample_size)
se_proportion_gpa_srs_fpc <- se_proportion_gpa_srs * sqrt((N - sample_size) / (N - 1))



# results
cat("Mean Salary:", mean_salary_srs, "\n")
cat("Standard Error of Mean Salary with FPC:", se_salary_srs_fpc, "\n")

cat("Proportion of A GPA", proportion_gpa_srs, "\n")
cat("Standard Error of Proportion of A GPA with FPC:", se_proportion_gpa_srs_fpc, "\n")

Mean Salary: 306816.7 


Standard Error of Mean Salary with FPC: 10682.97 
Proportion of A GPA 0.1133333 
Standard Error of Proportion of A GPA with FPC: 0.01736505 


In [6]:
# Stratified Sampling by Gender
stratified_sample <- data %>%
  group_by(Gender) %>%
  sample_frac(0.1)  # Adjust fraction for 10%

stratified_stats <- stratified_sample %>%
  group_by(Gender) %>%
  summarise(
    N_h = n(),
    n_h = n(),
    mean_salary_h = mean(Salary, na.rm = TRUE),
    prop_A_gpa_h = mean(collegeGPA >= 80, na.rm = TRUE),
    var_salary_h = var(Salary, na.rm = TRUE),
    var_prop_A_h = prop_A_gpa_h * (1 - prop_A_gpa_h)
  ) %>%
  ungroup() %>%
  mutate(weight_h = N_h / N)  # Weight of each stratum

# Combined estimates using weights
stratified_mean_salary <- sum(stratified_stats$weight_h * stratified_stats$mean_salary_h)
stratified_prop_A_gpa <- sum(stratified_stats$weight_h * stratified_stats$prop_A_gpa_h)

# Calculate SE with FPC for the combined stratified estimates
stratified_se_salary <- sqrt(sum((stratified_stats$weight_h^2) * (stratified_stats$var_salary_h / stratified_stats$n_h))) * sqrt((N - nrow(stratified_sample)) / (N - 1))
stratified_se_prop_A <- sqrt(sum((stratified_stats$weight_h^2) * (stratified_stats$var_prop_A_h / stratified_stats$n_h))) * sqrt((N - nrow(stratified_sample)) / (N - 1))

cat("Stratified Mean Salary:", stratified_mean_salary, "\n")
cat("Stratified Standard Error of Mean Salary with FPC:", stratified_se_salary, "\n")

cat("Stratified Proportion of A GPA:", stratified_prop_A_gpa, "\n")
cat("Stratified Standard Error of Proportion of A GPA with FPC:", stratified_se_prop_A, "\n")

Stratified Mean Salary: 33183.79 
Stratified Standard Error of Mean Salary with FPC: 1762.832 
Stratified Proportion of A GPA: 0.01167445 
Stratified Standard Error of Proportion of A GPA with FPC: 0.001748675 
