In [1]:
import geopandas as gpd
import pandas as pd
import os
import getpass
import matplotlib.pyplot as plt
#from earthpy.clip import clip_shp

### NOTES:


###### Bring in necessary geojson files and set your projection for all files

In [2]:
wd = os.getcwd()
wd = wd[:wd.find('notebooks')]

#crs is set for Central Texas; https://epsg.io/6578; 
crs =  {'init' :'epsg:6578'}

#parks = gpd.read_file(wd+"data/coaparks/parkboundaries.geojson")
#parks = parks.to_crs(crs).explode().reset_index()

quartbuff = gpd.read_file(wd+"data/coaparks_buffer/quarterbuff.shp")
quartbuff.crs={'init' :'epsg:6578'}

pop = gpd.read_file(wd+"data/blockgroups_censusdata/popmerge.shp")
pop = pop.to_crs(crs).reset_index()

race = gpd.read_file(wd+"data/blockgroups_censusdata/racemerge.shp")
race = race.to_crs(crs).reset_index()

income = gpd.read_file(wd+"data/blockgroups_censusdata/incomemerge.shp")
income = income.to_crs(crs).reset_index()

age = gpd.read_file(wd+"data/blockgroups_censusdata/agemerge.shp")
age = age.to_crs(crs).reset_index()


##### Check your projections
https://geopandas.org/projections.html

In [None]:
#quartbuff.crs

In [None]:
#pop.crs

In [None]:
#race.crs

In [None]:
#age.crs

###### Preview the files and clean the data

In [None]:
pop['fullarea_pop'] = pop['geometry'].area
#pop.head()

In [None]:
race['fullarea_race'] = race['geometry'].area
#race.head()

In [None]:
income['fullarea_income'] = income['geometry'].area
#income.head()

In [None]:
age['fullarea_age'] = age['geometry'].area
#quartbuff.head()

In [None]:
quartbuff['fullarea_buff'] = quartbuff['geometry'].area
#quartbuff.head()

##### The three cells below give totals for each demographic group. These will be used at the end to compare the total amount of the population as compared to the number served.

In [None]:
#race.sum( axis = 0,skipna = True,numeric_only=True)

In [None]:
#income.sum( axis = 0,skipna = True,numeric_only=True)

In [4]:
#age.sum( axis = 0,skipna = True,numeric_only=True)

### <font color=green>Run spatial analysis on the amount of people distributed within a census block group against the quarter mile park buffer area and the race census data</font>

In [None]:
rp_intersection = gpd.overlay(race, quartbuff, how='intersection') #https://geopandas.org/set_operations.html
rp_intersection['area_intersec'] = rp_intersection['geometry'].area
rp_intersection.to_file(wd+"data/access/rp_intersec_quarterbuff_ACS17.shp")
rp_intersection

In [None]:
#rp_intersection.crs

In [None]:
#rp_intersection.columns

In [None]:
rp_intersection[['GEOID10','Total_POP','fullarea_race','LOCATION_N','fullarea_buff','area_intersec','geometry']]

In [None]:
race_clip=rp_intersection.copy().reset_index()

for val in race_clip:

    race_clip['weight'] = race_clip['area_intersec']/ race_clip['fullarea_race']
    
    race_clip['access_pop'] = race_clip['weight'] * race_clip['Total_POP']
    race_clip['access_nonhis'] = race_clip['weight'] * race_clip['Not Hispan']
    race_clip['access_white'] = race_clip['weight'] * race_clip['White; Not']
    race_clip['access_his_lat'] = race_clip['weight'] * race_clip['Hispanic o']

race_clip.head()

In [None]:
#race_clip.columns

In [None]:
race_calc = race_clip[['LOCATION_N', 'Total_POP','access_pop', 'access_nonhis','access_white', 'access_his_lat','area_intersec','geometry']]
access_data = race_calc.dissolve(by='LOCATION_N',as_index=False, aggfunc='sum')

In [None]:
access_data

##### Combine the park area data to the access data

In [None]:
parks_ = quartbuff[['LOCATION_N','ASSET_SIZE']].copy()

In [None]:
parks_

In [None]:
access_data = access_data.merge(parks_, on='LOCATION_N')

In [None]:
access_data

##### The population served by parks is nomalized by dividing the population served by the area of the park for which they are being served 

In [None]:
access_data['Normalized_byArea'] = access_data['access_pop']/access_data['ASSET_SIZE']
access_data['Normalized_nonhis'] = access_data['access_nonhis']/access_data['ASSET_SIZE']
access_data['Normalized_white'] = access_data['access_white']/access_data['ASSET_SIZE']
access_data['Normalized_hislat'] = access_data['access_his_lat']/access_data['ASSET_SIZE']

In [None]:
access_data.to_file(wd+"data/access/access_data_race_ACS17.shp")
access_data

#### A dataframe is created with the 'acess_data' geoshapefile. This dataframe is cleaned up and the values are convereted into integers. Finally, we export the dataframe into a csv file and a shapefile. 

In [None]:
df = pd.DataFrame(access_data)

df['Park_Name']=df['LOCATION_N']
df['Total_Pop_Served'] = df['access_pop'].astype(int)
df['Normalized_byArea'] = df['Normalized_byArea'].astype(int)
df['Non_Hispan'] = df['access_nonhis'].astype(int)
df['Normalized_nonhis'] = df['Normalized_nonhis'].astype(int)
df['White'] = df['access_white'].astype(int)
df['Normalized_white'] = df['Normalized_white'].astype(int)
df['Hispan_Latin'] = df['access_his_lat'].astype(int)
df['Normalized_hislat'] = df['Normalized_hislat'].astype(int)

df.head()


Export to Shapefile

In [None]:
access_race_final = df.copy().drop(columns=['access_pop','access_nonhis','access_white','access_his_lat'])
access_race_final = gpd.GeoDataFrame(access_race_final, geometry='geometry')
access_race_final.to_file(wd+"data/access/access_race_final_ACS17.shp")
access_race_final.head()

Export to CSV

In [None]:
access_racetable = df.drop(columns=['access_pop','access_nonhis','access_white','access_his_lat', 'geometry'])
access_racetable.to_csv(wd+"data/access/access_table_race_ACS17.csv")
access_racetable.head()

Descriptive Statistics

In [None]:
Race_access_stats = access_racetable.describe()
Race_access_stats.to_csv(wd+"data/access/access_stats_race_ACS17.csv")
Race_access_stats

In [None]:
access_racetable.median()

In [None]:
#access_racetable.hist('');

##  <font color=goldenrod>Run spatial analysis on the amount of people distributed within a census block group against the quarter mile park buffer area and the income census data</font>

In [None]:
income.columns

In [None]:
ip_intersection = gpd.overlay(income, quartbuff, how='intersection') #https://geopandas.org/set_operations.html
ip_intersection['iarea_intersec'] = ip_intersection['geometry'].area
ip_intersection.to_file(wd+"data/access/ip_intersec_quarterbuff_ACS17.shp")
ip_intersection

In [None]:
ip_intersection.columns

In [None]:
ip_intersection[['GEOID10','Total_Pop','fullarea_income','LOCATION_N','fullarea_buff','iarea_intersec','geometry']]

In [None]:
#ip_intersection.columns

In [None]:
income_clip=ip_intersection.copy().reset_index()

for val in income_clip:

    income_clip['weight'] = income_clip['iarea_intersec']/ income_clip['fullarea_income']
    
    income_clip['Total Pop Served_allincomes'] = income_clip['weight'] * income_clip['Total_Pop']
    
    income_clip['Less than $25,000'] = ((income_clip['weight'] * income_clip['Less than'])\
                                        +(income_clip['weight'] * income_clip['$10,000 to'])\
                                        +(income_clip['weight'] * income_clip['$15,000 to'])\
                                        +(income_clip['weight'] * income_clip['$20,000 to']))
    
    income_clip['$25,000 to $49,999'] = ((income_clip['weight'] * income_clip['$25,000 to'])\
                                        +(income_clip['weight'] * income_clip['$30,000 to'])\
                                        +(income_clip['weight'] * income_clip['$35,000 to'])\
                                        +(income_clip['weight'] * income_clip['$40,000 to'])\
                                        +(income_clip['weight'] * income_clip['$45,000 to']))
    
    income_clip['$50,000 to $74,999'] = ((income_clip['weight'] * income_clip['$50,000 to'])\
                                        +(income_clip['weight'] * income_clip['$60,000 to']))
    
    income_clip['$75,000 to $99,999'] = (income_clip['weight'] * income_clip['$75,000 to'])
    
    income_clip['$100,000 to $149,999'] = ((income_clip['weight'] * income_clip['$100,000 t'])\
                                            +(income_clip['weight'] * income_clip['$125,000 t']))
    
    income_clip['$150,000 or more'] = ((income_clip['weight'] * income_clip['$150,000 t'])\
                                        +(income_clip['weight'] * income_clip['$200,000 o']))
    
income_clip.head()

In [None]:
#income_clip.columns

In [None]:
income_calc = income_clip[['LOCATION_N', 'Total_Pop','Total Pop Served_allincomes', 'Less than $25,000','$25,000 to $49,999',\
                           '$50,000 to $74,999','$75,000 to $99,999','$100,000 to $149,999','$150,000 or more','geometry']]

income_access_data = income_calc.dissolve(by='LOCATION_N',as_index=False, aggfunc='sum')

##### Combine the park area data to the access data

In [None]:
income_access_data = income_access_data.merge(parks_, on='LOCATION_N')

In [None]:
income_access_data

##### The population served by parks is nomalized by dividing the population served by the area of the park for which they are being served 

In [None]:
income_access_data['TotalPop_Served_norm'] = income_access_data['Total Pop Served_allincomes']/income_access_data['ASSET_SIZE']
income_access_data['Less than $25,000_norm'] = income_access_data['Less than $25,000']/income_access_data['ASSET_SIZE']
income_access_data['$25,000 to $49,999_norm'] = income_access_data['$25,000 to $49,999']/income_access_data['ASSET_SIZE']
income_access_data['$50,000 to $74,999_norm'] = income_access_data['$50,000 to $74,999']/income_access_data['ASSET_SIZE']
income_access_data['$75,000 to $99,999_norm'] = income_access_data['$75,000 to $99,999']/income_access_data['ASSET_SIZE']
income_access_data['$100,000 to $149,999_norm'] = income_access_data['$100,000 to $149,999']/income_access_data['ASSET_SIZE']
income_access_data['$150,000 or more_norm'] = income_access_data['$150,000 or more']/income_access_data['ASSET_SIZE']


In [None]:
income_access_data.to_file(wd+"data/access/access_data_income_ACS17.shp")
income_access_data

#### A dataframe is created with the 'acess_data' geoshapefile. This dataframe is cleaned up and the values are convereted into integers. Finally, we export the dataframe into a csv file and a shapefile. 

In [None]:
df1 = pd.DataFrame(income_access_data)
df1['Park_Name']=df1['LOCATION_N']

df1['Total Pop Served_allincomes'] = df1['Total Pop Served_allincomes'].astype(int)
df1['TotalPop_Served_norm'] = df1['TotalPop_Served_norm'].astype(int)

df1['Less than $25,000'] = df1['Less than $25,000'].astype(int)
df1['Less than $25,000_norm'] = df1['Less than $25,000_norm'].astype(int)

df1['$25,000 to $49,999'] = df1['$25,000 to $49,999'].astype(int)
df1['$25,000 to $49,999_norm'] = df1['$25,000 to $49,999_norm'].astype(int)

df1['$50,000 to $74,999'] = df1['$50,000 to $74,999'].astype(int)
df1['$50,000 to $74,999_norm'] = df1['$50,000 to $74,999_norm'].astype(int)

df1['$75,000 to $99,999'] = df1['$75,000 to $99,999'].astype(int)
df1['$75,000 to $99,999_norm'] = df1['$75,000 to $99,999_norm'].astype(int)

df1['$100,000 to $149,999'] = df1['$100,000 to $149,999'].astype(int)
df1['$100,000 to $149,999_norm'] = df1['$100,000 to $149,999_norm'].astype(int)

df1['$150,000 or more'] = df1['$150,000 or more'].astype(int)
df1['$150,000 or more_norm'] = df1['$150,000 or more_norm'].astype(int)


In [None]:
access_income_final = df1.copy()
access_income_final = gpd.GeoDataFrame(access_income_final, geometry='geometry')
access_income_final.to_file(wd+"data/access/access_income_final_ACS17.shp")
access_income_final.head()

In [None]:
access_incometable = df.drop(columns=['access_by_income_TotalPop','geometry'])
access_incometable.to_csv(wd+"data/access/access_table_income_ACS17.csv")
access_incometable.head()

In [None]:
access_income_stats = access_incometable.describe()
access_income_stats.to_csv(wd+"data/access/access_stats_income_ACS17.csv")
access_income_stats

## <font color=peru>Run spatial analysis on the amount of people distributed within a census block group against the quarter mile park buffer area and the age/sex census data</font>

In [None]:
age_intersection = gpd.overlay(age, quartbuff, how='intersection') #https://geopandas.org/set_operations.html
age_intersection['agearea_intersec'] = age_intersection['geometry'].area
age_intersection.to_file(wd+"data/access/age_intersec_quarterbuff_ACS17.shp")

age_intersection

In [None]:
age_intersec=age_intersection.drop(['GLOBALID', 'CREATED_BY', 'CREATED_DA', 'MODIFIED_B', 'MODIFIED_D',
       'Shape__Are', 'Shape__Len', 'fullarea_buff','MXASSETNUM', 'MXLOCATION', 'MXSITEID', 'MXCREATION', 'MXSTATUS',
       'MXCONDITIO', 'MXPRIORITY', 'MXLOADID'], axis=1)

In [None]:
age_intersec.columns

In [None]:
age_intersection[['GEOID10','Total_pop_','fullarea_age','LOCATION_N','fullarea_buff','agearea_intersec','geometry']]

In [None]:
age_intersec.columns

In [None]:
age_clip=age_intersec.copy().reset_index()

for val in age_clip:

    age_clip['weight'] = age_clip['agearea_intersec']/ age_clip['fullarea_age']
    
    age_clip['Totalpop_served'] = age_clip['weight'] * age_clip['Total_pop_']
    age_clip['Males_served'] = age_clip['weight'] * age_clip['Male']
    age_clip['Males_served_Under5'] = age_clip['weight'] * age_clip['Male_Under']
    age_clip['Males_served_Under18'] = age_clip['weight'] * age_clip['Male_Und_1']
    age_clip['Males_served_Over65'] = age_clip['weight'] * age_clip['Male_Over6']
    
    age_clip['Females_served'] = age_clip['weight'] * age_clip['Female_']
    age_clip['Females_served_Under5'] = age_clip['weight'] * age_clip['Female_Und']
    age_clip['Females_served_Under18'] = age_clip['weight'] * age_clip['Female_U_1']
    age_clip['Females_served_Over65'] = age_clip['weight'] * age_clip['Female_Ove']
        
age_clip.head()

In [None]:
#income_clip.columns

In [None]:
age_calc = age_clip[['LOCATION_N','Total_pop_', 'Totalpop_served', 'Males_served','Males_served_Under5','Males_served_Under18',\
                     'Males_served_Over65','Females_served','Females_served_Under5','Females_served_Under18',\
                     'Females_served_Over65','geometry']]

age_access_data = age_calc.dissolve(by='LOCATION_N',as_index=False, aggfunc='sum')

##### Combine the park area data to the access data

In [None]:
age_access_data = age_access_data.merge(parks_, on='LOCATION_N')

In [None]:
age_access_data

In [None]:
age_access_data.columns

##### The population served by parks is nomalized by dividing the population served by the area of the park for which they are being served 

In [None]:
age_access_data['Totalpop_served_norm'] = age_access_data['Totalpop_served']/age_access_data['ASSET_SIZE']
age_access_data['Males_served_norm'] = age_access_data['Males_served']/age_access_data['ASSET_SIZE']
age_access_data['Males_served_Under5_norm'] = age_access_data['Males_served_Under5']/age_access_data['ASSET_SIZE']
age_access_data['Males_served_Under18_norm'] = age_access_data['Males_served_Under18']/age_access_data['ASSET_SIZE']
age_access_data['Males_served_Over65_norm'] = age_access_data['Males_served_Over65']/age_access_data['ASSET_SIZE']

age_access_data['Females_served_norm'] = age_access_data['Females_served']/age_access_data['ASSET_SIZE']
age_access_data['Females_served_Under5_norm'] = age_access_data['Females_served_Under5']/age_access_data['ASSET_SIZE']
age_access_data['Females_served_Under18_norm'] = age_access_data['Females_served_Under18']/age_access_data['ASSET_SIZE']
age_access_data['Females_served_Over65_norm'] = age_access_data['Females_served_Over65']/age_access_data['ASSET_SIZE']

age_access_data['Under5_served'] = age_access_data['Males_served_Under5'] + age_access_data['Females_served_Under5']
age_access_data['Under5_served_norm'] = age_access_data['Under5_served']/age_access_data['ASSET_SIZE']

age_access_data['Under18_served'] = age_access_data['Males_served_Under18'] + age_access_data['Females_served_Under18']
age_access_data['Under18_served_norm'] = age_access_data['Under18_served']/age_access_data['ASSET_SIZE']

age_access_data['Over65_served'] = age_access_data['Males_served_Over65'] + age_access_data['Females_served_Over65']
age_access_data['Over65_served_norm'] = age_access_data['Over65_served']/age_access_data['ASSET_SIZE']

In [None]:
age_access_data.to_file(wd+"data/access/access_data_age_sex_ACS17.shp")
age_access_data

#### A dataframe is created with the 'acess_data' geoshapefile. This dataframe is cleaned up and the values are convereted into integers. Finally, we export the dataframe into a csv file and a shapefile. 

In [None]:
age_access_data.columns

In [None]:
df = pd.DataFrame(age_access_data)
df['Park_Name']=df['LOCATION_N']

df['Totalpop_served'] = df['Totalpop_served'].astype(int)
df['Totalpop_served_norm'] = df['Totalpop_served_norm'].astype(int)

df['Males_served'] = df['Males_served'].astype(int)
df['Males_served_norm'] = df['Males_served_norm'].astype(int)

df['Males_served_Under5'] = df['Males_served_Under5'].astype(int)
df['Males_served_Under5_norm'] = df['Males_served_Under5_norm'].astype(int)

df['Males_served_Under18'] = df['Males_served_Under18'].astype(int)
df['Males_served_Under18_norm'] = df['Males_served_Under18_norm'].astype(int)

df['Males_served_Over65'] = df['Males_served_Over65'].astype(int)
df['Males_served_Over65_norm'] = df['Males_served_Over65_norm'].astype(int)

df['Females_served'] = df['Females_served'].astype(int)
df['Females_served_norm'] = df['Females_served_norm'].astype(int)

df['Females_served_Under5'] = df['Females_served_Under5'].astype(int)
df['Females_served_Under5_norm'] = df['Females_served_Under5_norm'].astype(int)

df['Females_served_Under18'] = df['Females_served_Under18'].astype(int)
df['Females_served_Under18_norm'] = df['Females_served_Under18_norm'].astype(int)

df['Females_served_Over65'] = df['Females_served_Over65'].astype(int)
df['Females_served_Over65_norm'] = df['Females_served_Over65_norm'].astype(int)

df['Under5_served'] = df['Under5_served'].astype(int)
df['Under5_served_norm'] = df['Under5_served_norm'].astype(int)

df['Under18_served'] = df['Under18_served'].astype(int)
df['Under18_served_norm'] = df['Under18_served_norm'].astype(int)

df['Over65_served'] = df['Over65_served'].astype(int)
df['Over65_served_norm'] = df['Over65_served_norm'].astype(int)


In [None]:
df.columns

In [None]:
access_age_final = df.copy()
access_age_final = gpd.GeoDataFrame(access_age_final, geometry='geometry')
access_age_final.to_file(wd+"data/access/access_age_final_ACS17.shp")
access_age_final.head()

In [None]:
access_agetable = df.drop(columns=['geometry'])
access_agetable.to_csv(wd+"data/access/access_table_age_ACS17.csv")
access_agetable.head()

In [None]:
access_age_stats = access_agetable.describe()
#access_age_stats.to_csv(wd+"data/access/access_stats_age_ACS17.csv")
access_age_stats

In [None]:
import numpy as np

In [None]:
age_median = access_agetable.median()
age_median

In [None]:
descr = access_age_stats.aggregate([np.median, np.std, np.mean]).reset_index()
descr = pd.Series( access_age_stats, index=descr.index)
descr

### END OF CODE