In [4]:
import pandas as pd
import os
import csv

In [5]:
 # Get the  school & student files
school_info_path = os.path.join("Resources", "schools_complete.csv")
students_info_path = os.path.join("Resources", "students_complete.csv")

In [6]:
# Read the school & students files to a df

schools_df = pd.read_csv(school_info_path)
school_info_df = schools_df.rename(columns = {"name": "School Name", 
                                              "type": "School Type", 
                                              "size" : "Total Students", 
                                              "budget" : "Total Budget"})
school_info_df.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [7]:
# Schools Summary
# Schools Overview
schools_summary = [{"Total Schools":school_info_df['School Name'].count(),
                 "Total Students":school_info_df['Total Students'].sum(),
                 "Total Budget":school_info_df['Total Budget'].sum()}]
schools_summary = pd.DataFrame(schools_summary)
schools_summary = schools_summary[['Total Schools', 'Total Students', 'Total Budget']]
schools_summary

Unnamed: 0,Total Schools,Total Students,Total Budget
0,15,39170,24649428


In [8]:
#read the students df

students_df = pd.read_csv(students_info_path)
students_info_df = students_df.rename(columns = {'name': "Student Name", 
                                              "gender": "Student Gender", 
                                              "grade": "Student Grade",
                                             'school': "School Name",
                                             'reading_score': "Reading Score",
                                             'math_score': "Math Score"})

students_info_df.head()

Unnamed: 0,Student ID,Student Name,Student Gender,Student Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [9]:
# Combine the data
district_data = pd.merge(school_info_df, students_info_df, on="School Name", how='right')
district_data.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget,Student ID,Student Name,Student Gender,Student Grade,Reading Score,Math Score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [10]:
district_data = district_data[['School Name','School Type', "Total Students", 'Total Budget', "Student Grade", 'Reading Score', 'Math Score']]
district_data.head()

Unnamed: 0,School Name,School Type,Total Students,Total Budget,Student Grade,Reading Score,Math Score
0,Huang High School,District,2917,1910635,9th,66,79
1,Huang High School,District,2917,1910635,12th,94,61
2,Huang High School,District,2917,1910635,12th,90,60
3,Huang High School,District,2917,1910635,12th,67,58
4,Huang High School,District,2917,1910635,9th,97,84


In [11]:
# Get Student Stats to add to schools summary table

total_students = district_data["Total Students"].count()

reading_avg = district_data["Reading Score"].mean()

math_avg = district_data["Math Score"].mean()

# Passing Reading & Math Scores - scores over 70%

passing_reading_scores = district_data.loc[district_data["Reading Score"] > 70]
passing_reading_scores = passing_reading_scores['Reading Score'].count()

passing_math_scores = district_data.loc[district_data["Math Score"] > 70]
passing_math_scores = passing_math_scores['Math Score'].count()

# Passing Averages

percent_passing_reading = passing_reading_scores / total_students * 100

percent_passing_math = passing_math_scores / total_students * 100

#Overall Averages

overall_avg = (reading_avg + math_avg)/2


In [12]:
# Get Student Stats to add to summary table

students_summary = [{"Average Math Score":math_avg,
                     "Average Reading Score": reading_avg,
                     "% Passing Reading":percent_passing_reading,
                    "% Passing Math":percent_passing_math,
                    "% Overall Passing Rate":overall_avg}]
students_summary = pd.DataFrame(students_summary, columns=['Average Math Score', 'Average Reading Score',
                                         '% Passing Reading', "% Passing Math", "% Overall Passing Rate"])
students_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Reading,% Passing Math,% Overall Passing Rate
0,78.985371,81.87784,82.971662,72.392137,80.431606


In [13]:
print("DISTRICT OVERVIEW")
district_overview = pd.concat([schools_summary, students_summary], axis='columns')
district_overview

DISTRICT OVERVIEW


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Reading,% Passing Math,% Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,82.971662,72.392137,80.431606


In [14]:
district_data.head()

Unnamed: 0,School Name,School Type,Total Students,Total Budget,Student Grade,Reading Score,Math Score
0,Huang High School,District,2917,1910635,9th,66,79
1,Huang High School,District,2917,1910635,12th,94,61
2,Huang High School,District,2917,1910635,12th,90,60
3,Huang High School,District,2917,1910635,12th,67,58
4,Huang High School,District,2917,1910635,9th,97,84


In [19]:
grouped_schools = district_data.groupby("School Name")

In [48]:
# School Numbers

school_nums = grouped_schools.mean()

grouped_df = pd.DataFrame(school_nums)
grouped_df

Unnamed: 0_level_0,Total Students,Total Budget,Reading Score,Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,4976.0,3124928.0,81.033963,77.048432
Cabrera High School,1858.0,1081356.0,83.97578,83.061895
Figueroa High School,2949.0,1884411.0,81.15802,76.711767
Ford High School,2739.0,1763916.0,80.746258,77.102592
Griffin High School,1468.0,917500.0,83.816757,83.351499
Hernandez High School,4635.0,3022020.0,80.934412,77.289752
Holden High School,427.0,248087.0,83.814988,83.803279
Huang High School,2917.0,1910635.0,81.182722,76.629414
Johnson High School,4761.0,3094650.0,80.966394,77.072464
Pena High School,962.0,585858.0,84.044699,83.839917


In [52]:
school_type = grouped_schools["School Type"].unique()
total_school_budget = grouped_schools["Total Budget"].unique
total_students = grouped_schools["Total Students"].count
over_math = grouped_schools["Math Score"].mean()
over_reading = grouped_schools["Reading Score"].mean()
overall_avg = (over_math + over_reading) / 2

print(school_type)
#print(budget_per_student)
#print(over_math)
#print(over_reading)
#print(overall_avg)

School Name
Bailey High School       [District]
Cabrera High School       [Charter]
Figueroa High School     [District]
Ford High School         [District]
Griffin High School       [Charter]
Hernandez High School    [District]
Holden High School        [Charter]
Huang High School        [District]
Johnson High School      [District]
Pena High School          [Charter]
Rodriguez High School    [District]
Shelton High School       [Charter]
Thomas High School        [Charter]
Wilson High School        [Charter]
Wright High School        [Charter]
Name: School Type, dtype: object


In [None]:
# combine the things here

In [54]:
# Try the other stuff

by_grade = district_data.groupby("Student Grade")
by_school = by_grade.groupby("School Name")
by_school

AttributeError: Cannot access callable attribute 'groupby' of 'DataFrameGroupBy' objects, try using the 'apply' method

In [None]:
#school_piv = district_data.pivot_table(index=["School Name"])
#print(school_piv)