In [222]:
import pandas as pd
import os
import csv

In [223]:
 # Get the  school & student files
school_info_path = os.path.join("Resources", "schools_complete.csv")
students_info_path = os.path.join("Resources", "students_complete.csv")

In [224]:
# Read the school & students files to a df

schools_df = pd.read_csv(school_info_path)
school_info_df = schools_df.rename(columns = {"name": "School Name", 
                                              "type": "School Type", 
                                              "size" : "Total Students", 
                                              "budget" : "Total Budget"})
school_info_df.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [225]:
# Schools Summary
# Schools Overview
schools_summary = [{"Total Schools":school_info_df['School Name'].count(),
                 "Total Students":school_info_df['Total Students'].sum(),
                 "Total Budget":school_info_df['Total Budget'].sum()}]
schools_summary = pd.DataFrame(schools_summary)
schools_summary = schools_summary[['Total Schools', 'Total Students', 'Total Budget']]
schools_summary

Unnamed: 0,Total Schools,Total Students,Total Budget
0,15,39170,24649428


In [226]:
#read the students df

students_df = pd.read_csv(students_info_path)
students_info_df = students_df.rename(columns = {'name': "Student Name", 
                                              "gender": "Student Gender", 
                                              "grade": "Student Grade",
                                             'school': "School Name",
                                             'reading_score': "Reading Score",
                                             'math_score': "Math Score"})

students_info_df.head()

Unnamed: 0,Student ID,Student Name,Student Gender,Student Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [227]:
# Combine the data
district_data = pd.merge(school_info_df, students_info_df, on="School Name", how='right')
district_data.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget,Student ID,Student Name,Student Gender,Student Grade,Reading Score,Math Score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [228]:
district_data = district_data[['School Name','School Type', "Total Students", 'Total Budget', "Student Grade", 'Reading Score', 'Math Score']]
district_data.head()

Unnamed: 0,School Name,School Type,Total Students,Total Budget,Student Grade,Reading Score,Math Score
0,Huang High School,District,2917,1910635,9th,66,79
1,Huang High School,District,2917,1910635,12th,94,61
2,Huang High School,District,2917,1910635,12th,90,60
3,Huang High School,District,2917,1910635,12th,67,58
4,Huang High School,District,2917,1910635,9th,97,84


In [229]:
# Get Student Stats to add to schools summary table

total_students = district_data["Total Students"].count()

reading_avg = district_data["Reading Score"].mean()

math_avg = district_data["Math Score"].mean()

# Passing Reading & Math Scores - scores over 70%

passing_reading_scores = district_data.loc[district_data["Reading Score"] > 70]
passing_reading_scores = passing_reading_scores['Reading Score'].count()

passing_math_scores = district_data.loc[district_data["Math Score"] > 70]
passing_math_scores = passing_math_scores['Math Score'].count()

# Passing Averages

percent_passing_reading = passing_reading_scores / total_students * 100

percent_passing_math = passing_math_scores / total_students * 100

#Overall Averages

overall_avg = (reading_avg + math_avg)/2


In [230]:
# Get Student Stats to add to summary table

students_summary = [{"Average Math Score":math_avg,
                     "Average Reading Score": reading_avg,
                     "% Passing Reading":percent_passing_reading,
                    "% Passing Math":percent_passing_math,
                    "% Overall Passing Rate":overall_avg}]
students_summary = pd.DataFrame(students_summary, columns=['Average Math Score', 'Average Reading Score',
                                         '% Passing Reading', "% Passing Math", "% Overall Passing Rate"])
students_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Reading,% Passing Math,% Overall Passing Rate
0,78.985371,81.87784,82.971662,72.392137,80.431606


In [231]:
print("DISTRICT OVERVIEW")
district_overview = pd.concat([schools_summary, students_summary], axis='columns')
district_overview

DISTRICT OVERVIEW


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Reading,% Passing Math,% Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,82.971662,72.392137,80.431606


In [237]:
district_data.head()

Unnamed: 0,School Name,School Type,Total Students,Total Budget,Student Grade,Reading Score,Math Score
0,Huang High School,District,2917,1910635,9th,66,79
1,Huang High School,District,2917,1910635,12th,94,61
2,Huang High School,District,2917,1910635,12th,90,60
3,Huang High School,District,2917,1910635,12th,67,58
4,Huang High School,District,2917,1910635,9th,97,84


In [None]:
df2 = district_data.groupby(['School Name','School Type','Total Students','Total Budget','Student Grade','Reading Score','Math Score'])

In [251]:
grouped_schools = district_data.groupby("School Name")

school_type = grouped_schools["School Type"].unique()
school_students = grouped_schools["Total Students"].count()
school_budgets = grouped_schools["Total Budget"].unique()
budget_per_student = school_budgets / school_students
avg_reading = grouped_schools["Reading Score"].mean()
avg_math = grouped_schools.mean()

passing_reading_scores = grouped_schools.loc[grouped_schools["Reading Score"] > 70]
passing_reading_scores = passing_reading_scores['Reading Score'].count()
print(passing_reading_scores)

#passing_math_scores = grouped_schools.loc[grouped_schools["Math Score"] > 70]
#passing_math_scores = passing_math_scores['Math Score'].count()

print(school_students)
print(school_budgets)
print(budget_per_student)
print(school_type)

AttributeError: Cannot access callable attribute 'loc' of 'DataFrameGroupBy' objects, try using the 'apply' method

In [236]:
# Finding Calcs on Grouped

school_students = district_data_grouped["Total Students"].count()
school_budgets = district_data_grouped["Total Budget"].unique()
budget_per_student = school_budgets / school_students
passing_math_scores = passing_math_scores['Math Score'].count()


reading_scores = district_data_groupe(students_info_df["Reading Score"].value_counts())
reading_scores.columns = ['Reading Score', 'Count']
reading_scores = reading_scores.loc[(reading_scores["Reading Score"] > 70)]
passing_reading_scores = [{"Passing Reading":reading_scores["Count"].sum()}]
print(passing_reading_scores)




IndexError: invalid index to scalar variable.

In [140]:
Trying Fuctions# Trpassing_math_scores = district_data.loc[district_data["Math Score"] > 70]
passing_math_scores = passing_math_scores['Math Score'].count()

#grouped_schools_results = [{"Passing Reading":reading_scores["Count"].sum()}]

#reading_scores = pd.DataFrame(students_info_df["Reading Score"].value_counts())
#reading_scores.reset_index(inplace=True)
#reading_scores.columns = ['Reading Score', 'Count']
#reading_scores = reading_scores.loc[(reading_scores["Reading Score"] > 70)]
#passing_reading_scores = [{"Passing Reading":reading_scores["Count"].sum()}]
#print(passing_reading_scores)

In [141]:
passing_math_scores

28356