# Census Demographics & Restaraunt Info

In [1]:
# Dependencies
from census import Census
from jconfig import (api_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from uszipcode import SearchEngine, SimpleZipcode, Zipcode


# Census API Key
c = Census(api_key, year=2017)

# Define target Market Areas and create DataFrame 

In [2]:
#read in zip.csv
zip_pd = pd.read_csv("zip.csv")

#clear any null values
zip_pd.dropna(inplace= True)

#filter df to only include standard zip codes(exclude unique and p.o. boxes)
std_pd= zip_pd.loc[zip_pd.Type=="Standard"]

In [3]:
#filter to only include zip codes in Dallas County
county_zip_pd= std_pd.loc[(std_pd["County"]== "Dallas") | (std_pd["County"]== "Tarrant")]

#extract all zip codes in Dallas County to a list
county_zips = county_zip_pd["Zip Code"].tolist()
city_zips = county_zip_pd["City"].tolist()

#create lists for lat and long per zip code
lat=[]
long=[]
for z in county_zips:
    search= SearchEngine()
    zipcode= search.by_zipcode(z)
    latitude= zipcode.lat
    lat.append(latitude)
    longitude= zipcode.lng
    long.append(longitude)
    
#create dictionary
county_zips_dict={"City": city_zips,"Zip_Codes": county_zips,"Lat":lat, "Lon":long}

#convert list to DF in order to merge later with Census data
county_zip_df= pd.DataFrame(county_zips_dict)
county_zip_df.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon
0,Addison,75001,32.96,-96.84
1,Carrollton,75006,32.95,-96.89
2,Coppell,75019,32.96,-96.98
3,Irving,75038,32.88,-96.98
4,Irving,75039,32.88,-96.95


In [4]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
county_zip_df.to_csv("CityCounty_zip_data.csv", encoding="utf-8", index=False)

# Create Census DataFrame for target Market Areas

In [5]:
# Run Census Search to retrieve data on all cities
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "NAME": "Name", "zip code tabulation area": "Zip_Codes"})
#convert Zip Codes to numeric, in order to merge later
census_pd.Zip_Codes= pd.to_numeric(census_pd.Zip_Codes)
census_pd.head()

Unnamed: 0,Median Age,Population,Household Income,Per Capita Income,Name,Zip_Codes
0,38.9,17599.0,11757.0,7041.0,ZCTA5 00601,601
1,40.9,39209.0,16190.0,8978.0,ZCTA5 00602,602
2,40.4,50135.0,16645.0,10897.0,ZCTA5 00603,603
3,42.8,6304.0,13387.0,5960.0,ZCTA5 00606,606
4,41.4,27590.0,18741.0,9266.0,ZCTA5 00610,610


In [6]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

In [7]:
# merge Zip DF with Census DF
zip_census_merge= pd.merge(county_zip_df,census_pd, on="Zip_Codes")
zip_census_merge.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon,Median Age,Population,Household Income,Per Capita Income,Name
0,Addison,75001,32.96,-96.84,32.9,14617.0,73578.0,50313.0,ZCTA5 75001
1,Carrollton,75006,32.95,-96.89,34.8,50788.0,57415.0,27810.0,ZCTA5 75006
2,Coppell,75019,32.96,-96.98,40.3,41947.0,118471.0,54352.0,ZCTA5 75019
3,Irving,75038,32.88,-96.98,30.1,29762.0,51444.0,29512.0,ZCTA5 75038
4,Irving,75039,32.88,-96.95,32.4,18010.0,89845.0,57205.0,ZCTA5 75039


# Population DataFrame 

In [8]:
# Male population data for 15-44yr olds
male_age_data = c.acs5.get(("NAME", "B01001_006E","B01001_007E","B01001_008E","B01001_009E","B01001_010E","B01001_011E",
                      "B01001_012E","B01001_013E","B01001_014E"), {'for': 'zip code tabulation area:*'})

# Female population data for 15-44yr olds
female_age_data = c.acs5.get(("NAME", "B01001_030E","B01001_031E","B01001_032E","B01001_033E","B01001_034E","B01001_035E",
                      "B01001_036E","B01001_037E","B01001_038E"), {'for': 'zip code tabulation area:*'})
# Convert to DataFrame
male_pd = pd.DataFrame(male_age_data)
male_pd= male_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
female_pd= pd.DataFrame(female_age_data)
female_pd= female_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})

In [9]:
# Total population by gender
male_pd["Total_Male"]= male_pd.B01001_006E + male_pd.B01001_007E + male_pd.B01001_008E + male_pd.B01001_009E \
    + male_pd.B01001_010E + male_pd.B01001_011E + male_pd.B01001_012E + male_pd.B01001_013E + male_pd.B01001_014E
female_pd["Total_Female"]= female_pd.B01001_030E + female_pd.B01001_031E + female_pd.B01001_032E + female_pd.B01001_033E \
    + female_pd.B01001_034E + female_pd.B01001_035E + female_pd.B01001_036E + female_pd.B01001_037E + female_pd.B01001_038E

# merge Male and Female df's
gender_merge= pd.merge(male_pd,female_pd, on="Zip_Codes")

# calculate new column for total 15-44 yr olds
gender_merge.loc[:,"Pop_15to44"]=gender_merge.Total_Male + gender_merge.Total_Female

# filter DF to include only Total and Zip Codes
gender_df = gender_merge[["Pop_15to44", "Zip_Codes"]]

# convert Zip_codes to numeric
gender_df.Zip_Codes= pd.to_numeric(gender_df.Zip_Codes)

# merge gender_df with zip_census_merge
demographic_df = pd.merge(zip_census_merge,gender_df, on="Zip_Codes")
demographic_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,City,Zip_Codes,Lat,Lon,Median Age,Population,Household Income,Per Capita Income,Name,Pop_15to44
0,Addison,75001,32.96,-96.84,32.9,14617.0,73578.0,50313.0,ZCTA5 75001,8826.0
1,Carrollton,75006,32.95,-96.89,34.8,50788.0,57415.0,27810.0,ZCTA5 75006,21488.0
2,Coppell,75019,32.96,-96.98,40.3,41947.0,118471.0,54352.0,ZCTA5 75019,15538.0
3,Irving,75038,32.88,-96.98,30.1,29762.0,51444.0,29512.0,ZCTA5 75038,16331.0
4,Irving,75039,32.88,-96.95,32.4,18010.0,89845.0,57205.0,ZCTA5 75039,11060.0
5,Garland,75040,32.93,-96.62,34.1,62277.0,59305.0,22020.0,ZCTA5 75040,25643.0
6,Garland,75041,32.88,-96.65,29.7,33304.0,42556.0,17520.0,ZCTA5 75041,14295.0
7,Garland,75042,32.92,-96.68,31.6,39559.0,42153.0,16676.0,ZCTA5 75042,16966.0
8,Garland,75043,32.86,-96.59,35.1,59951.0,58571.0,25448.0,ZCTA5 75043,23135.0
9,Garland,75044,32.96,-96.65,41.0,40942.0,64085.0,32381.0,ZCTA5 75044,16154.0


In [11]:
# Create a new DF and grabbing the Location Data to check for 0
newdf=demographic_df[['City','Lat','Lon','Pop_15to44','Population']]
newdf = newdf[newdf.Pop_15to44 != 0]
newdf.head()

Unnamed: 0,City,Lat,Lon,Pop_15to44,Population
0,Addison,32.96,-96.84,8826.0,14617.0
1,Carrollton,32.95,-96.89,21488.0,50788.0
2,Coppell,32.96,-96.98,15538.0,41947.0
3,Irving,32.88,-96.98,16331.0,29762.0
4,Irving,32.88,-96.95,11060.0,18010.0


# Create a Heatmap of Popluation for Market Areas 

In [12]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [13]:
pop_rate = newdf['Population'] / 10000
coord_pair = [(xy) for xy in zip(newdf.Lat, newdf.Lon)]
newdf["Coordinates"]= coord_pair

locations = newdf["Coordinates"]
newdf.head()

Unnamed: 0,City,Lat,Lon,Pop_15to44,Population,Coordinates
0,Addison,32.96,-96.84,8826.0,14617.0,"(32.96, -96.84)"
1,Carrollton,32.95,-96.89,21488.0,50788.0,"(32.95, -96.89)"
2,Coppell,32.96,-96.98,15538.0,41947.0,"(32.96, -96.98)"
3,Irving,32.88,-96.98,16331.0,29762.0,"(32.88, -96.98)"
4,Irving,32.88,-96.95,11060.0,18010.0,"(32.88, -96.95)"


In [14]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=pop_rate, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 100
heat_layer.point_radius = 1

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
# NEW SECTION 

In [None]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
#newdf.to_csv("new_dem_data.csv", encoding="utf-8", index=False)

In [None]:
# Adding Coordinate Pair
#pop_rate = newdf['Population'] / 10000
#coord_pair = [(xy) for xy in zip(newdf.Lat, newdf.Lon)]
#newdf["Coordinates"]= coord_pair

#locations = newdf["Coordinates"]
#newdf.head()

In [None]:
#newdf["Lat_str"] = newdf.Lat.astype(str)
#newdf["Lon_str"] = newdf.Lon.astype(str)
#newdf["tgt_coord"] = newdf.Lat_str + "," + newdf.Lon_str

In [None]:
# geocoordinates and pullling restaurants within a radius of 
#target_coordinates = newdf.tgt_coord
#target_radius = 50
#target_type = "restaurant"
#target_rated = "rating"

# set up a parameters dictionary
#params = {
#    "location": target_coordinates,
#    "radius": target_radius,
#    "type": target_type,
#    "key": gkey
#}

# base url
#base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# run a request using our params dictionary
#response = requests.get(base_url, params=params)

In [None]:
# convert response to json
#places_data = response.json()

# Print the json (pretty printed)
#len(json.dumps(places_data, sort_keys=True))

In [None]:
# looking at Restaraunts 

In [None]:
# geocoordinates
target_coordinates = "32.7766642,-96.79698789999999" # Coordinates for Dallas, TX 
target_radius = 50
target_type = "restaurant"

# set up a parameters dictionary
params = {
    "location": target_coordinates,
    "radius": target_radius,
    "type": target_type,
    "key": gkey
}

# base url
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# run a request using our params dictionary
response = requests.get(base_url, params=params)

# All coords need to 
#weights = newdf['Total'] / 1000
#coord_pair = [(xy) for xy in zip(newdf.Lat,newdf.Lon)]
#newdf["Coordinates"]= coord_pair

#locations = newdf["Coordinates"]
#newdf.head()