# Workbook Objective: 
### Examine School data

#### Import Libraries

In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as mn
from scipy.stats import shapiro
from scipy import stats


#### Import data

In [73]:
ca_fips = pd.read_csv('./data-clean/ca_fips.csv')
df_housing = pd.read_csv('./data-clean/housing_values_2021.csv')
df_best_counties = pd.read_csv('./data-clean/best_counties.csv')

In [43]:
ca_fips.head()

Unnamed: 0,fips,county_name
0,6001,Alameda
1,6003,Alpine
2,6005,Amador
3,6007,Butte
4,6009,Calaveras


In [44]:
df_housing.head()

Unnamed: 0,county_name,med_home_value
0,Alameda,854696.0
1,Alpine,419080.0
2,Amador,359884.0
3,Butte,377765.0
4,Calaveras,366538.0


In [45]:
df_best_counties.head()

Unnamed: 0,county_name,is_charter,num_schools,enrollment,test_takers,pct_takers,avg_eng,avg_math,abv_avg_both
0,Shasta,1,1.0,127.0,72.0,0.566929,0.944444,0.875,0.847222
1,Yolo,1,1.0,88.0,38.0,0.431818,0.947368,0.789474,0.789474
2,Santa Barbara,1,1.0,145.0,56.0,0.386207,0.928571,0.785714,0.767857
3,El Dorado,0,5.0,1745.0,572.0,0.327794,0.907343,0.777972,0.751748
4,Nevada,0,3.0,526.0,161.0,0.306084,0.881988,0.763975,0.745342


In [74]:
df_best_counties = pd.merge(df_best_counties, df_housing, on='county_name')

In [75]:
df_best_counties.head()

Unnamed: 0,county_name,is_charter,num_schools,enrollment,test_takers,pct_takers,avg_eng,avg_math,abv_avg_both,med_home_value
0,Shasta,1,1.0,127.0,72.0,0.566929,0.944444,0.875,0.847222,327189.0
1,Shasta,0,6.0,1422.0,277.0,0.194796,0.830325,0.613718,0.592058,327189.0
2,Yolo,1,1.0,88.0,38.0,0.431818,0.947368,0.789474,0.789474,552495.0
3,Yolo,0,6.0,1884.0,719.0,0.381635,0.76217,0.603616,0.574409,552495.0
4,Santa Barbara,1,1.0,145.0,56.0,0.386207,0.928571,0.785714,0.767857,670504.0


In [76]:
ca_fips.head()

Unnamed: 0,fips,county_name
0,6001,Alameda
1,6003,Alpine
2,6005,Amador
3,6007,Butte
4,6009,Calaveras


In [77]:
df_best_counties = pd.merge(df_best_counties, ca_fips, how='left', on='county_name'.lower())

In [78]:
df_best_counties.head()

Unnamed: 0,county_name,is_charter,num_schools,enrollment,test_takers,pct_takers,avg_eng,avg_math,abv_avg_both,med_home_value,fips
0,Shasta,1,1.0,127.0,72.0,0.566929,0.944444,0.875,0.847222,327189.0,6089
1,Shasta,0,6.0,1422.0,277.0,0.194796,0.830325,0.613718,0.592058,327189.0,6089
2,Yolo,1,1.0,88.0,38.0,0.431818,0.947368,0.789474,0.789474,552495.0,6113
3,Yolo,0,6.0,1884.0,719.0,0.381635,0.76217,0.603616,0.574409,552495.0,6113
4,Santa Barbara,1,1.0,145.0,56.0,0.386207,0.928571,0.785714,0.767857,670504.0,6083


In [79]:
df_best_counties.isna().sum()

county_name       0
is_charter        0
num_schools       0
enrollment        0
test_takers       0
pct_takers        0
avg_eng           0
avg_math          0
abv_avg_both      0
med_home_value    0
fips              0
dtype: int64

In [86]:
df_best_counties['abv_avg_both'] = df_best_counties['abv_avg_both'].apply(lambda x: (round(x, 1) * 100))

In [88]:
df_best_counties.head()

Unnamed: 0,county_name,is_charter,num_schools,enrollment,test_takers,pct_takers,avg_eng,avg_math,abv_avg_both,med_home_value,fips
0,Shasta,1,1.0,127.0,72.0,0.566929,0.944444,0.875,80.0,327189.0,6089
1,Shasta,0,6.0,1422.0,277.0,0.194796,0.830325,0.613718,60.0,327189.0,6089
2,Yolo,1,1.0,88.0,38.0,0.431818,0.947368,0.789474,80.0,552495.0,6113
3,Yolo,0,6.0,1884.0,719.0,0.381635,0.76217,0.603616,60.0,552495.0,6113
4,Santa Barbara,1,1.0,145.0,56.0,0.386207,0.928571,0.785714,80.0,670504.0,6083


In [115]:
import plotly.figure_factory as ff
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

fips = df_best_counties['fips'].to_list()

values = df_best_counties['abv_avg_both'].to_list()


fig = ff.create_choropleth(
    title='California Schools',
    fips=fips, values=values,
     scope=['CA'],
     county_outline={'color': 'rgb(15, 15, 55)', 'width': 1},
     state_outline={'color': 'rgb(15, 15, 55)', 'width': 1},
     round_legend_values=False,
     legend_title='Average SAT Scores',
     )

fig.update_layout(
    autosize=True,
    legend_x = 1,
    annotations = {'x': -0.12, 'xanchor': 'right'}
)

fig.layout.template = None
fig.show()

# Source: https://plotly.com/python/county-choropleth/


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.


Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent p