In [17]:
# Dependencies and Setup
from pathlib import Path
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.
school_data_complete = pd.merge(student_data, school_data, how="left", on="school_name")

In [18]:
school_data_complete

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12,Thomas High School,51,48,14,Independent,1635,1043130
39166,39166,Dawn Bell,F,10,Thomas High School,81,89,14,Independent,1635,1043130
39167,39167,Rebecca Tanner,F,9,Thomas High School,99,99,14,Independent,1635,1043130
39168,39168,Desiree Kidd,F,10,Thomas High School,72,77,14,Independent,1635,1043130


In [19]:
# Local Government Area (LGA) Summary
# unique schools count
school_count = school_data_complete['school_name'].nunique()
print(school_count)

15


In [98]:
# students count
student_count = school_data_complete['student_name'].count()
print(student_count)

39170


In [81]:
# calc total budget
schools_budget = school_data_complete.drop_duplicates(subset='school_name').reset_index(drop=True)['budget'].sum()

In [82]:
school_budgets

24649428

In [87]:
average_maths_score = school_data_complete['maths_score'].mean()

In [88]:
average_maths_score

70.33819249425581

In [89]:
average_reading_score = school_data_complete['reading_score'].mean()

In [90]:
average_reading_score

69.98013786060761

In [96]:
passing_maths

33717

In [104]:
passing_maths_count = school_data_complete[(school_data_complete["maths_score"] >= 50)].count()["student_name"]
passing_maths_percentage = passing_maths_count / float(student_count) * 100

In [108]:
print(passing_maths_count)
print(passing_maths_percentage)

33717
86.07863160582077


In [106]:
passing_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 50)].count()["student_name"]
passing_reading_percentage = passing_reading_count/ float(student_count) * 100

In [109]:
print(passing_reading_count)
print(passing_reading_percentage)

33070
84.42685728874139


In [121]:
students_passing_math = school_data_complete[(school_data_complete["maths_score"] >= 50)]
students_passing_reading = school_data_complete[(school_data_complete["reading_score"] >= 50)]
intersection_count = pd.merge(students_passing_math, students_passing_reading, how='inner').count()["student_name"]

In [122]:
intersection_count

28519

In [123]:
# percentage pf students passing both reading & math
overall_passing_percentage = intersection_count/ float(student_count) * 100

In [125]:
overall_passing_percentage

72.80827163645647

In [None]:
# School Summary

In [211]:
# Use the code provided to select the type per school from school_data
school_types = school_data.set_index(["school_name"])["type"].reset_index(name='type')

# Calculate the total student count per school
per_school_counts = student_data.groupby(["school_name"]).size().reset_index(name='student_count')

# Calculate the total school budget and per capita spending per school from school_data
per_school_budget = school_data.set_index(["school_name"])["budget"].reset_index(name='school_budget')

df_merged = pd.merge(per_school_counts, per_school_budget, on='school_name')
df_merged['per_capita_budget'] = df_merged['school_budget'] / df_merged['student_count']
per_capita_budget = df_merged[['school_name', 'per_capita_budget']]

# Calculate the average test scores per school from school_data_complete
per_school_maths = school_data_complete.groupby('school_name')['maths_score'].mean().reset_index()
per_school_maths.columns = ['school_name', 'average_maths_score']

per_school_reading = school_data_complete.groupby('school_name')['reading_score'].mean().reset_index()
per_school_reading.columns = ['school_name', 'average_reading_score']

In [212]:
school_types

Unnamed: 0,school_name,type
0,Huang High School,Government
1,Figueroa High School,Government
2,Shelton High School,Independent
3,Hernandez High School,Government
4,Griffin High School,Independent
5,Wilson High School,Independent
6,Cabrera High School,Independent
7,Bailey High School,Government
8,Holden High School,Independent
9,Pena High School,Independent


In [213]:
per_school_counts

Unnamed: 0,school_name,student_count
0,Bailey High School,4976
1,Cabrera High School,1858
2,Figueroa High School,2949
3,Ford High School,2739
4,Griffin High School,1468
5,Hernandez High School,4635
6,Holden High School,427
7,Huang High School,2917
8,Johnson High School,4761
9,Pena High School,962


In [214]:
per_school_budget

Unnamed: 0,school_name,school_budget
0,Huang High School,1910635
1,Figueroa High School,1884411
2,Shelton High School,1056600
3,Hernandez High School,3022020
4,Griffin High School,917500
5,Wilson High School,1319574
6,Cabrera High School,1081356
7,Bailey High School,3124928
8,Holden High School,248087
9,Pena High School,585858


In [215]:
per_capita_budget

Unnamed: 0,school_name,per_capita_budget
0,Bailey High School,628.0
1,Cabrera High School,582.0
2,Figueroa High School,639.0
3,Ford High School,644.0
4,Griffin High School,625.0
5,Hernandez High School,652.0
6,Holden High School,581.0
7,Huang High School,655.0
8,Johnson High School,650.0
9,Pena High School,609.0


In [216]:
per_school_maths

Unnamed: 0,school_name,average_maths_score
0,Bailey High School,72.352894
1,Cabrera High School,71.657158
2,Figueroa High School,68.698542
3,Ford High School,69.091274
4,Griffin High School,71.788147
5,Hernandez High School,68.874865
6,Holden High School,72.583138
7,Huang High School,68.935207
8,Johnson High School,68.8431
9,Pena High School,72.088358


In [217]:
per_school_reading

Unnamed: 0,school_name,average_reading_score
0,Bailey High School,71.008842
1,Cabrera High School,71.359526
2,Figueroa High School,69.077993
3,Ford High School,69.572472
4,Griffin High School,71.245232
5,Hernandez High School,69.186408
6,Holden High School,71.660422
7,Huang High School,68.910525
8,Johnson High School,69.039277
9,Pena High School,71.613306


In [226]:
# Get the students who passed maths and passed reading by creating separate filtered DataFrames from school_data_complete.
total_students_per_school = school_data_complete.groupby('school_name').size()
students_passing_math = school_data_complete[school_data_complete['maths_score'] > 50].groupby('school_name').size()
school_passing_maths = (students_passing_math / total_students_per_school * 100).reset_index(name='math_passing_percentage')

students_passing_reading = school_data_complete[school_data_complete['reading_score'] > 50].groupby('school_name').size()
school_passing_reading = (students_passing_reading / total_students_per_school * 100).reset_index(name='reading_passing_percentage')

# Get the students who passed both reading and maths in a separate DataFrame from school_data_complete.
#passing_maths_and_reading =

In [224]:
school_passing_maths

Unnamed: 0,school_name,math_passing_percentage
0,Bailey High School,89.951768
1,Cabrera High School,88.805167
2,Figueroa High School,80.264496
3,Ford High School,81.087988
4,Griffin High School,89.373297
5,Hernandez High School,79.374326
6,Holden High School,88.290398
7,Huang High School,80.253685
8,Johnson High School,80.298257
9,Pena High School,90.22869


In [227]:
school_passing_reading

Unnamed: 0,school_name,reading_passing_percentage
0,Bailey High School,85.409968
1,Cabrera High School,86.706136
2,Figueroa High School,80.739234
3,Ford High School,80.722892
4,Griffin High School,86.989101
5,Hernandez High School,79.978425
6,Holden High School,86.651054
7,Huang High School,79.808022
8,Johnson High School,80.529301
9,Pena High School,85.239085
