# Census Demographics & Restaraunt Info

In [1]:
# Dependencies
from census import Census
from config import (api_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from uszipcode import SearchEngine, SimpleZipcode, Zipcode


# Census API Key
c = Census(api_key, year=2017)

# Define target Market Areas and create DataFrame 

In [20]:
#read in zip.csv
zip_pd = pd.read_csv("zip.csv")

#clear any null values
zip_pd.dropna(inplace= True)

#filter df to only include standard zip codes(exclude unique and p.o. boxes)
std_pd= zip_pd.loc[zip_pd.Type=="Standard"]

In [21]:
#filter to only include zip codes in Dallas County
county_zip_pd= std_pd.loc[(std_pd["County"]== "Dallas") | (std_pd["County"]== "Harris")| (std_pd["County"]== "Tarrant")
                         | (std_pd["County"]== "Bexar")| (std_pd["County"]== "Travis")| (std_pd["County"]== "El Paso")
                         | (std_pd["County"]== "Collin")| (std_pd["County"]== "Denton")| (std_pd["County"]== "Fort Bend")
                         | (std_pd["County"]== "Montgomery")]

#extract all zip codes in Dallas County to a list
county_zips = county_zip_pd["Zip Code"].tolist()

#create lists for lat and long per zip code
lat=[]
long=[]
for z in county_zips:
    search= SearchEngine()
    zipcode= search.by_zipcode(z)
    latitude= zipcode.lat
    lat.append(latitude)
    longitude= zipcode.lng
    long.append(longitude)
    
#create dictionary
county_zips_dict={"Zip_Codes": county_zips,"Lat":lat, "Lon":long}

#convert list to DF in order to merge later with Census data
county_zip_df= pd.DataFrame(county_zips_dict)
county_zip_df.head()

Unnamed: 0,Zip_Codes,Lat,Lon
0,75001,32.96,-96.84
1,75002,33.08,-96.61
2,75006,32.95,-96.89
3,75007,33.0,-96.9
4,75009,33.3,-96.8


In [22]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
county_zip_df.to_csv("county_zip_data.csv", encoding="utf-8", index=False)

# Create Census DataFrame for target Market Areas

In [23]:
# Run Census Search to retrieve data on all cities

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "NAME": "Name", "zip code tabulation area": "Zip_Codes"})
#convert Zip Codes to numeric, in order to merge later
census_pd.Zip_Codes= pd.to_numeric(census_pd.Zip_Codes)
census_pd.head()

Unnamed: 0,Median Age,Population,Household Income,Per Capita Income,Name,Zip_Codes
0,38.9,17599.0,11757.0,7041.0,ZCTA5 00601,601
1,40.9,39209.0,16190.0,8978.0,ZCTA5 00602,602
2,40.4,50135.0,16645.0,10897.0,ZCTA5 00603,603
3,42.8,6304.0,13387.0,5960.0,ZCTA5 00606,606
4,41.4,27590.0,18741.0,9266.0,ZCTA5 00610,610


In [24]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data_states.csv", encoding="utf-8", index=False)

In [6]:
# merge Zip DF with Census DF
zip_census_merge= pd.merge(county_zip_df,census_pd, on="Zip_Codes")
zip_census_merge.head()

Unnamed: 0,Zip_Codes,Lat,Lon,Median Age,Population,Household Income,Per Capita Income,Name
0,75001,32.96,-96.84,32.9,14617.0,73578.0,50313.0,ZCTA5 75001
1,75002,33.08,-96.61,35.7,69688.0,98652.0,39818.0,ZCTA5 75002
2,75006,32.95,-96.89,34.8,50788.0,57415.0,27810.0,ZCTA5 75006
3,75007,33.0,-96.9,39.5,53744.0,82079.0,36289.0,ZCTA5 75007
4,75009,33.3,-96.8,37.5,10557.0,90717.0,36058.0,ZCTA5 75009


# Population DataFrame 

In [8]:
# Male population data for 15-44yr olds
male_age_data = c.acs5.get(("NAME", "B01001_006E","B01001_007E","B01001_008E","B01001_009E","B01001_010E","B01001_011E",
                      "B01001_012E","B01001_013E","B01001_014E"), {'for': 'zip code tabulation area:*'})

# Female population data for 15-44yr olds
female_age_data = c.acs5.get(("NAME", "B01001_030E","B01001_031E","B01001_032E","B01001_033E","B01001_034E","B01001_035E",
                      "B01001_036E","B01001_037E","B01001_038E"), {'for': 'zip code tabulation area:*'})
# Convert to DataFrame
male_pd = pd.DataFrame(male_age_data)
male_pd= male_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
female_pd= pd.DataFrame(female_age_data)
female_pd= female_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})

In [10]:
# Total population by gender
male_pd["Total_Male"]= male_pd.B01001_006E + male_pd.B01001_007E + male_pd.B01001_008E + male_pd.B01001_009E \
    + male_pd.B01001_010E + male_pd.B01001_011E + male_pd.B01001_012E + male_pd.B01001_013E + male_pd.B01001_014E
female_pd["Total_Female"]= female_pd.B01001_030E + female_pd.B01001_031E + female_pd.B01001_032E + female_pd.B01001_033E \
    + female_pd.B01001_034E + female_pd.B01001_035E + female_pd.B01001_036E + female_pd.B01001_037E + female_pd.B01001_038E

# merge Male and Female df's
gender_merge= pd.merge(male_pd,female_pd, on="Zip_Codes")

# calculate new column for total 15-44 yr olds
gender_merge.loc[:,"Total"]=gender_merge.Total_Male + gender_merge.Total_Female

# filter DF to include only Total and Zip Codes
gender_df = gender_merge[["Total", "Zip_Codes"]]

# convert Zip_codes to numeric
gender_df.Zip_Codes= pd.to_numeric(gender_df.Zip_Codes)

# merge gender_df with zip_census_merge
demographic_df = pd.merge(zip_census_merge,gender_df, on="Zip_Codes")
demographic_df.head(10)

Unnamed: 0,Zip_Codes,Lat,Lon,Median Age,Population,Household Income,Per Capita Income,Name,Total
0,75001,32.96,-96.84,32.9,14617.0,73578.0,50313.0,ZCTA5 75001,8826.0
1,75002,33.08,-96.61,35.7,69688.0,98652.0,39818.0,ZCTA5 75002,28557.0
2,75006,32.95,-96.89,34.8,50788.0,57415.0,27810.0,ZCTA5 75006,21488.0
3,75007,33.0,-96.9,39.5,53744.0,82079.0,36289.0,ZCTA5 75007,21743.0
4,75009,33.3,-96.8,37.5,10557.0,90717.0,36058.0,ZCTA5 75009,4179.0
5,75010,33.03,-96.9,35.2,27822.0,81856.0,41328.0,ZCTA5 75010,13173.0
6,75013,33.12,-96.69,37.4,38975.0,124915.0,51388.0,ZCTA5 75013,15089.0
7,75019,32.96,-96.98,40.3,41947.0,118471.0,54352.0,ZCTA5 75019,15538.0
8,75022,33.03,-97.12,40.6,24625.0,156902.0,59110.0,ZCTA5 75022,8358.0
9,75023,33.05,-96.73,39.5,49563.0,83764.0,37519.0,ZCTA5 75023,19871.0


In [13]:
# Create a new DF and grabbing the Location Data to check for 0
newdf=demographic_df[['Lat','Lon','Total','Population']]
newdf = newdf[newdf.Total != 0]
newdf.head()

Unnamed: 0,Lat,Lon,Total,Population
0,32.96,-96.84,8826.0,14617.0
1,33.08,-96.61,28557.0,69688.0
2,32.95,-96.89,21488.0,50788.0
3,33.0,-96.9,21743.0,53744.0
4,33.3,-96.8,4179.0,10557.0


# Create a Heatmap of Popluation for Market Areas 

In [15]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [18]:
pop_rate = newdf['Total'] / 10000
coord_pair = [(xy) for xy in zip(newdf.Lat, newdf.Lon)]
newdf["Coordinates"]= coord_pair

locations = newdf["Coordinates"]
newdf.head()

Unnamed: 0,Lat,Lon,Total,Population,Coordinates
0,32.96,-96.84,8826.0,14617.0,"(32.96, -96.84)"
1,33.08,-96.61,28557.0,69688.0,"(33.08, -96.61)"
2,32.95,-96.89,21488.0,50788.0,"(32.95, -96.89)"
3,33.0,-96.9,21743.0,53744.0,"(33.0, -96.9)"
4,33.3,-96.8,4179.0,10557.0,"(33.3, -96.8)"


In [19]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=pop_rate, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 100
heat_layer.point_radius = 1

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

# Create Heatmap of Population Demopgrachis for our Market Areas 