In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from uszipcode import SearchEngine, SimpleZipcode, Zipcode

# Configure gmaps
gmaps.configure(api_key="AIzaSyBdIwmIZSt7SYrVH1z0fCK7hQ6kFS7VQNk")

In [2]:
#read in zip.csv
zip_pd = pd.read_csv("zip.csv")

#clear any null values
zip_pd.dropna(inplace= True)

#filter df to only include standard zip codes(exclude unique and p.o. boxes)
std_pd= zip_pd.loc[zip_pd.Type=="Standard"]

In [3]:
#filter to only include zip codes in Dallas County
county_zip_pd= std_pd.loc[(std_pd["County"]== "Dallas") | (std_pd["County"]== "Harris")| (std_pd["County"]== "Tarrant")
                         | (std_pd["County"]== "Bexar")| (std_pd["County"]== "Travis")| (std_pd["County"]== "El Paso")
                         | (std_pd["County"]== "Collin")| (std_pd["County"]== "Denton")| (std_pd["County"]== "Fort Bend")
                         | (std_pd["County"]== "Montgomery")]

#extract all zip codes in Dallas County to a list
county_zips = county_zip_pd["Zip Code"].tolist()
city_zips = county_zip_pd["City"].tolist()

#create lists for lat and long per zip code
lat=[]
long=[]
for z in county_zips:
    search= SearchEngine()
    zipcode= search.by_zipcode(z)
    latitude= zipcode.lat
    lat.append(latitude)
    longitude= zipcode.lng
    long.append(longitude)
    
#create dictionary
county_zips_dict={"City": city_zips,"Zip_Codes": county_zips,"Lat":lat, "Lon":long}
#convert list to DF in order to merge later with Census data
county_zip_df= pd.DataFrame(county_zips_dict)
county_zip_df.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon
0,Addison,75001,32.96,-96.84
1,Allen,75002,33.08,-96.61
2,Carrollton,75006,32.95,-96.89
3,Carrollton,75007,33.0,-96.9
4,Celina,75009,33.3,-96.8


In [4]:
apikey= "f342c203f985f826ddbfc242fce4a634199dbcd5"
#from config import api_key
c = Census(apikey,year=2017)
# run api call for general tableid/variables
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zip_Codes"})
#convert Zip Codes to numeric, in order to merge later
census_pd.Zip_Codes= pd.to_numeric(census_pd.Zip_Codes)
census_pd.head()

Unnamed: 0,Median Age,Population,Poverty Count,Household Income,Per Capita Income,Name,Zip_Codes
0,38.9,17599.0,11282.0,11757.0,7041.0,ZCTA5 00601,601
1,40.9,39209.0,20428.0,16190.0,8978.0,ZCTA5 00602,602
2,40.4,50135.0,25176.0,16645.0,10897.0,ZCTA5 00603,603
3,42.8,6304.0,4092.0,13387.0,5960.0,ZCTA5 00606,606
4,41.4,27590.0,12553.0,18741.0,9266.0,ZCTA5 00610,610


In [5]:
# merge Zip DF with Census DF
zip_census_merge= pd.merge(county_zip_df,census_pd, on="Zip_Codes")
zip_census_merge.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon,Median Age,Population,Poverty Count,Household Income,Per Capita Income,Name
0,Addison,75001,32.96,-96.84,32.9,14617.0,1257.0,73578.0,50313.0,ZCTA5 75001
1,Allen,75002,33.08,-96.61,35.7,69688.0,4746.0,98652.0,39818.0,ZCTA5 75002
2,Carrollton,75006,32.95,-96.89,34.8,50788.0,7466.0,57415.0,27810.0,ZCTA5 75006
3,Carrollton,75007,33.0,-96.9,39.5,53744.0,4207.0,82079.0,36289.0,ZCTA5 75007
4,Celina,75009,33.3,-96.8,37.5,10557.0,1042.0,90717.0,36058.0,ZCTA5 75009


In [None]:
#census data table info: https://www.census.gov/programs-surveys/acs/guidance/which-data-tool/table-ids-explained.html
# ACS Census Table search https://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t
# variable list https://api.census.gov/data/2017/acs/acs5/variables.html
#census data documentation https://jtleider.github.io/censusdata/
# census table id variables explained https://www.census.gov/programs-surveys/acs/guidance/which-data-tool/table-ids-explained.html

In [None]:
#### pull population data for 15-44 year olds, the demographic most likely to order delivery####
#from config import api_key
c = Census(apikey,year=2017)
#get male population data for 15-44yr olds
male_age_data = c.acs5.get(("NAME", "B01001_006E","B01001_007E","B01001_008E","B01001_009E","B01001_010E","B01001_011E",
                      "B01001_012E","B01001_013E","B01001_014E"), {'for': 'zip code tabulation area:*'})
#get female population data for 15-44yr olds
female_age_data = c.acs5.get(("NAME", "B01001_030E","B01001_031E","B01001_032E","B01001_033E","B01001_034E","B01001_035E",
                      "B01001_036E","B01001_037E","B01001_038E"), {'for': 'zip code tabulation area:*'})
# Convert to DataFrame
male_pd = pd.DataFrame(male_age_data)
male_pd= male_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
female_pd= pd.DataFrame(female_age_data)
female_pd= female_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})

# Total population by gender
male_pd["Total_Male"]= male_pd.B01001_006E + male_pd.B01001_007E + male_pd.B01001_008E + male_pd.B01001_009E \
    + male_pd.B01001_010E + male_pd.B01001_011E + male_pd.B01001_012E + male_pd.B01001_013E + male_pd.B01001_014E
female_pd["Total_Female"]= female_pd.B01001_030E + female_pd.B01001_031E + female_pd.B01001_032E + female_pd.B01001_033E \
    + female_pd.B01001_034E + female_pd.B01001_035E + female_pd.B01001_036E + female_pd.B01001_037E + female_pd.B01001_038E
#rename zip code tabulation area to Zip_Codes to align with other dataframes
#male_pd= male_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
#female_pd= female_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
#merge male and female df's
gender_merge= pd.merge(male_pd,female_pd, on="Zip_Codes")
#calculate new column for total 15-44 yr olds
gender_merge.loc[:,"Total"]=gender_merge.Total_Male + gender_merge.Total_Female
# filter DF to include only Total and Zip Codes
gender_df = gender_merge[["Total", "Zip_Codes"]]
#convert Zip_codes to numeric
gender_df.Zip_Codes= pd.to_numeric(gender_df.Zip_Codes)

In [None]:
#merge gender_df with zip_census_merge
demographic_df= pd.merge(zip_census_merge,gender_df, on="Zip_Codes")
demographic_df.head(10)

In [None]:
# Create a new DF and grabbing the Location Data to check for 0
newdf=demographic_df[['Lat','Lon','Total','Population']]
newdf = newdf[newdf.Total != 0]
newdf.head()

In [None]:

weights = newdf['Total'] / 1000
coord_pair = [(xy) for xy in zip(newdf.Lat,newdf.Lon)]
newdf["Coordinates"]= coord_pair

locations = newdf["Coordinates"]
newdf.head()

In [None]:
#
figure_layout={
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}
fig = gmaps.figure(map_type="TERRAIN", layout=figure_layout)
heat_layer= gmaps.heatmap_layer(locations,weights = newdf.Population,
                         dissipating=False, max_intensity=.001,
                        point_radius=.050)

fig.add_layer(heat_layer)
fig

import gmaps
import gmaps.datasets

gmaps.configure(api_key="AIzaSyBdIwmIZSt7SYrVH1z0fCK7hQ6kFS7VQNk")

m=gmaps.Map()
m