## Setup

In [1]:
#Import Libraries
import pandas as pd
from pathlib import Path

In [2]:
#Declare Filepaths
schoolDataFile = Path("Resources/schools_complete.csv")
studentDataFile = Path("Resources/students_complete.csv")

outFilePath = Path("Output/finishedAnalysis")

In [3]:
#Create main dataframe for analysis by loading the .csv and merging on school_name, displaying the head
schoolData = pd.read_csv(schoolDataFile)
studentData = pd.read_csv(studentDataFile)

fullData = pd.merge(studentData, schoolData, how="left", on=["school_name", "school_name"])
fullData.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [4]:
#Unique Schools
schoolCount = fullData["school_name"].nunique()
schoolCount

15

In [5]:
#Total Students
studentCount = fullData['Student ID'].nunique()
studentCount

39170

In [6]:
#Total Budget
totalBudget = schoolData['budget'].sum()
totalBudget

24649428

In [7]:
#Average Math
mathAverage = fullData['math_score'].mean()
mathAverage

78.98537145774827

In [8]:
#Average Reading
readAverage = fullData['reading_score'].mean()
readAverage

81.87784018381414

In [9]:
#Math Passing Count
mminScore = 70
mpassingCount = fullData[(fullData['math_score'] >= mminScore)].count()['student_name']
mpassingPercent = (mpassingCount / studentCount) * 100
mpassingPercent


74.9808526933878

In [10]:
#Reading Passing Count
rminScore = 70
rpassingCount = fullData[(fullData['reading_score'] >= rminScore)].count()['student_name']
rpassingPercent = (rpassingCount / studentCount) * 100
rpassingPercent

85.80546336482001

In [11]:
#Percent passing both math and reading
mminScore = 70
rminScore = 70
mrpassingCount = fullData[(fullData['reading_score'] >= rminScore) & (fullData['math_score'] >= mminScore)].count()['student_name']

mrpassingPercent = (mrpassingCount / studentCount) * 100
mrpassingPercent

65.17232575950983

In [13]:
#Create district summary dataframe, format, and display
summaryDistrict = pd.DataFrame({'Total Schools' : [schoolCount],
                        'Total Students' : [studentCount],
                        'Total Budget' : [totalBudget],
                        'Average Math Score' : [mathAverage],
                        'Average Reading Score' : [readAverage],
                        'Percent Passing Math' : [mpassingPercent],
                        'Percent Passing Reading' : [rpassingPercent],
                        'Percent Passing Overall' : [mrpassingPercent]})

summaryDistrict['Total Students'] = summaryDistrict['Total Students'].map('{:,}'.format)
summaryDistrict['Total Budget'] = summaryDistrict['Total Budget'].map("${:,.2f}".format)

summaryDistrict

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Overall
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [19]:
#Select all school types
schoolTypes = schoolData[['school_name', 'type']]

In [25]:
#Student count per school
perSchoolCount = fullData['school_name'].value_counts()
fullSummary = pd.merge(schoolTypes, perSchoolCount, how="left", on=['school_name'])
fullSummary.rename(columns={'count' : 'Student Count'})

Unnamed: 0,school_name,type,Student Count
0,Huang High School,District,2917
1,Figueroa High School,District,2949
2,Shelton High School,Charter,1761
3,Hernandez High School,District,4635
4,Griffin High School,Charter,1468
5,Wilson High School,Charter,2283
6,Cabrera High School,Charter,1858
7,Bailey High School,District,4976
8,Holden High School,Charter,427
9,Pena High School,Charter,962


In [None]:
#Total school budget and per student spending
schoolSpending = schoolData[['school_name', 'budget']]
schoolSpending['Per Student Spending'] 

In [None]:
#Average test scores per school

In [None]:
#Students per school passing math

In [None]:
#Students per school passing reading

In [None]:
#Students per school passing both

In [None]:
#Convert passing student count to passing rate

In [None]:
#Create per school summary dataframe

## Highest-Performing Schools (by % Overall Passing)

In [None]:
#Sort and display school summary dataframe by % overall passing column (desc)

## Bottom Performing Schools (By % Overall Passing)

In [None]:
#Sort and display school summary dataframe by % overall passing column (asce)

## Math score by grade

In [None]:
#Seperate data by grade
#Group by 'school_name' and take the mean of the 'math_score' column for each.
#Combine each of the scores above into single DataFrame called 'gradeMathScores'
#Remove index title

## Reading score by grade

In [None]:
#Seperate data by grade
#Group by 'school_name' and take the mean of the 'reading_score' column for each.
#Combine each of the scores above into single DataFrame called 'gradeReadingScores'
#Remove index title

## Scores by school spending

In [None]:
#Create bins

In [None]:
#Copy per school summary dataframe to work with

In [None]:
#Use cut to put the schools in bins

In [None]:
#Calculate Averages for the test score columns

In [None]:
#Assemble dataframe

## Scores by school size

In [None]:
#Create bins

In [None]:
#Use cut to put the schools in bins

In [None]:
#Calculate Averages for the test score columns

In [None]:
#Assemble dataframe

## Scores by school type

In [None]:
#Group per school summary by "School Type" and get averages for test score columns

In [None]:
#Assemble dataframe