In [11]:

import pandas as pd
import requests
import os, sys, inspect
import numpy as np
import matplotlib.pyplot as plt
# add parent dir to system dir
currdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootdir = os.path.dirname(currdir)
sys.path.insert(0, rootdir)

from src.config import googleapikey

In [12]:
ffname = os.path.join(rootdir, "data", "int", "zip_codes.csv")
biz_by_zip_df = pd.read_csv(ffname, header=None, names=["Zip Code"])

biz_by_zip_df.head()

Unnamed: 0,Zip Code
0,78701
1,78702
2,78703
3,78704
4,78705


In [13]:
#adding lat/lon columns for google search
biz_by_zip_df["Lat"] = ""
biz_by_zip_df["Lng"] = ""

# set up params
params = {
    "key": googleapikey
}

# Loop through and grab the lat/lng using Google maps
for index, row in biz_by_zip_df.iterrows():

    # update params with zipcode each loop
    params['address'] = row['Zip Code']

    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    # Run requests to grab the JSON at the requested URL
    zip_location = requests.get(base_url, params=params)
    print(zip_location.url)

    zip_location = zip_location.json()

    # Append the lat/lng to the appropriate columns
    # Use try / except to skip any cities with errors
    try:
        zip_lat = zip_location["results"][0]["geometry"]["location"]["lat"]
        zip_lng = zip_location["results"][0]["geometry"]["location"]["lng"]

        biz_by_zip_df.loc[index, "Lat"] = zip_lat
        biz_by_zip_df.loc[index, "Lng"] = zip_lng

    except (KeyError, IndexError):
        print("Error with city data. Skipping")

        # drops rows with errors
        biz_by_zip_df.drop(labels=index, inplace=True)


# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78701
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78702
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78703
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78704
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78705
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78717
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78721
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78722
https://maps.googleapis.com/maps/api/geocode/json?key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&address=78723
https://maps.googleapis.com/

Unnamed: 0,Zip Code,Lat,Lng
0,78701,30.2729,-97.7444
1,78702,30.2604,-97.7145
2,78703,30.2915,-97.7688
3,78704,30.2457,-97.7688
4,78705,30.2962,-97.739


Bank Count

In [14]:
# Create an empty column for bank count
biz_by_zip_df["Bank Count"] = ""

# Set up params
params = {
    "radius": 5000,
    "type": "bank",
    "key": googleapikey
}

# Loop through and run Google search to get all banks in 3.1 mile radius (5000 meters)
for index, row in biz_by_zip_df.iterrows():

    # Get lat/lng from df
    lat = row['Lat']
    lng = row['Lng']

    # Update params
    params['location'] = f"{lat},{lng}"

    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    base_url = "https://maps.googleapis.com/maps/api/place/radarsearch/json"

    # Run a request, print the url, and convert to json
    bank_data = requests.get(base_url, params=params)
    print(bank_data.url)
    bank_data = bank_data.json()

    # Measure bank count on the number of results in the retrieved area
    bank_count = len(bank_data["results"])

    print(f"Final Bank Count: {bank_count}")
    print("")

    # Store the bank count into the Data Frame
    biz_by_zip_df.loc[index, "Bank Count"] = bank_count

    # Reset bank_count (so there is no chance that a previous record is influencing a latter one)
    bank_count = 0

# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=bank&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2729209%2C-97.74438630000002
Final Bank Count: 109

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=bank&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2603535%2C-97.7145152
Final Bank Count: 79

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=bank&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2915328%2C-97.76883579999999
Final Bank Count: 110

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=bank&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.24567279999999%2C-97.76883579999999
Final Bank Count: 95

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=bank&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2961708%2C-97.73895429999999
Final Bank Count: 108

https://maps.googleapis.com/maps/api/place/radarsear

Unnamed: 0,Zip Code,Lat,Lng,Bank Count
0,78701,30.2729,-97.7444,109
1,78702,30.2604,-97.7145,79
2,78703,30.2915,-97.7688,110
3,78704,30.2457,-97.7688,95
4,78705,30.2962,-97.739,108


Liquor Store Count

In [15]:
# Create an empty column for liquor store count
biz_by_zip_df["Liquor Store Count"] = ""

# Set up params
params = {
    "radius": 5000,
    "type": "liquor_store",
    "key": googleapikey
}

# Loop through and run Google search to get all liquor stores in 3.1 mile radius (5000 meters)
for index, row in biz_by_zip_df.iterrows():

    # Get lat/lng from df
    lat = row['Lat']
    lng = row['Lng']

    # Update params
    params['location'] = f"{lat},{lng}"

    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    base_url = "https://maps.googleapis.com/maps/api/place/radarsearch/json"

    # Run a request, print the url, and convert to json
    liquor_data = requests.get(base_url, params=params)
    print(liquor_data.url)
    liquor_data = liquor_data.json()

    # Measure liquor store count on the number of results in the retrieved area
    liquor_count = len(liquor_data["results"])

    print(f"Final Liquor Store Count: {liquor_count}")
    print("")

    # Store the liquor store count into the Data Frame
    biz_by_zip_df.loc[index, "Liquor Store Count"] = liquor_count

    # Reset liquor_count (so there is no chance that a previous record is influencing a latter one)
    liquor_count = 0

# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=liquor_store&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2729209%2C-97.74438630000002
Final Liquor Store Count: 61

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=liquor_store&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2603535%2C-97.7145152
Final Liquor Store Count: 54

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=liquor_store&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2915328%2C-97.76883579999999
Final Liquor Store Count: 46

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=liquor_store&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.24567279999999%2C-97.76883579999999
Final Liquor Store Count: 52

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&type=liquor_store&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2961708%2C-97.73895429999999
Final

Unnamed: 0,Zip Code,Lat,Lng,Bank Count,Liquor Store Count
0,78701,30.2729,-97.7444,109,61
1,78702,30.2604,-97.7145,79,54
2,78703,30.2915,-97.7688,110,46
3,78704,30.2457,-97.7688,95,52
4,78705,30.2962,-97.739,108,54


Wal-Mart Count

In [16]:
# Create an empty column for walmart count
biz_by_zip_df["Wal-Mart Count"] = ""

# Set up params
params = {
    "radius": 5000,
    "name": "Wal-Mart",
    "key": googleapikey
}

# Loop through and run Google search to get all walmarts in 3.1 mile radius (5000 meters)
for index, row in biz_by_zip_df.iterrows():

    # Get lat/lng from df
    lat = row['Lat']
    lng = row['Lng']

    # Update params
    params['location'] = f"{lat},{lng}"

    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    base_url = "https://maps.googleapis.com/maps/api/place/radarsearch/json"

    # Run a request, print the url, and convert to json
    walmart_data = requests.get(base_url, params=params)
    print(walmart_data.url)
    walmart_data = walmart_data.json()

    # Measure walmart count on the number of results in the retrieved area
    walmart_count = len(walmart_data["results"])

    print(f"Final Wal-Mart Count: {walmart_count}")
    print("")

    # Store the walmart count into the Data Frame
    biz_by_zip_df.loc[index, "Wal-Mart Count"] = walmart_count

    # Reset walmart_count (so there is no chance that a previous record is influencing a latter one)
    walmart_count = 0

# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Wal-Mart&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2729209%2C-97.74438630000002
Final Wal-Mart Count: 1

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Wal-Mart&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2603535%2C-97.7145152
Final Wal-Mart Count: 15

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Wal-Mart&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2915328%2C-97.76883579999999
Final Wal-Mart Count: 1

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Wal-Mart&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.24567279999999%2C-97.76883579999999
Final Wal-Mart Count: 17

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Wal-Mart&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2961708%2C-97.73895429999999
Final Wal-Mart Count: 14

https://maps.goog

Unnamed: 0,Zip Code,Lat,Lng,Bank Count,Liquor Store Count,Wal-Mart Count
0,78701,30.2729,-97.7444,109,61,1
1,78702,30.2604,-97.7145,79,54,15
2,78703,30.2915,-97.7688,110,46,1
3,78704,30.2457,-97.7688,95,52,17
4,78705,30.2962,-97.739,108,54,14


Target Count

In [19]:
# Create an empty column for target count
biz_by_zip_df["Target Count"] = ""

# Set up params
params = {
    "radius": 5000,
    "name": "Target",
    "key": googleapikey
}

# Loop through and run Google search to get all Targets in 3.1 mile radius (5000 meters)
for index, row in biz_by_zip_df.iterrows():

    # Get lat/lng from df
    lat = row['Lat']
    lng = row['Lng']

    # Update params
    params['location'] = f"{lat},{lng}"

    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    base_url = "https://maps.googleapis.com/maps/api/place/radarsearch/json"

    # Run a request, print the url, and convert to json
    target_data = requests.get(base_url, params=params)
    print(target_data.url)
    target_data = target_data.json()

    # Measure target count on the number of results in the retrieved area
    target_count = len(target_data["results"])

    print(f"Final Target Count: {target_count}")
    print("")

    # Store the target count into the Data Frame
    biz_by_zip_df.loc[index, "Target Count"] = target_count

    # Reset target_count (so there is no chance that a previous record is influencing a latter one)
    target_count = 0

# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Target&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2729209%2C-97.74438630000002
Final Target Count: 7

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Target&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2603535%2C-97.7145152
Final Target Count: 2

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Target&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2915328%2C-97.76883579999999
Final Target Count: 2

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Target&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.24567279999999%2C-97.76883579999999
Final Target Count: 5

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Target&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2961708%2C-97.73895429999999
Final Target Count: 4

https://maps.googleapis.com/maps/api/pla

Unnamed: 0,Zip Code,Lat,Lng,Bank Count,Liquor Store Count,Wal-Mart Count,Starbucks Count,Target Count
0,78701,30.2729,-97.7444,109,61,1,29,7
1,78702,30.2604,-97.7145,79,54,15,24,2
2,78703,30.2915,-97.7688,110,46,1,22,2
3,78704,30.2457,-97.7688,95,52,17,27,5
4,78705,30.2962,-97.739,108,54,14,23,4


Starbucks Count

In [17]:
# Create an empty column for Starbucks count
biz_by_zip_df["Starbucks Count"] = ""

# Set up params
params = {
    "radius": 5000,
    "name": "Starbucks",
    "key": googleapikey
}

# Loop through and run Google search to get all Starbucks in 3.1 mile radius (5000 meters)
for index, row in biz_by_zip_df.iterrows():

    # Get lat/lng from df
    lat = row['Lat']
    lng = row['Lng']

    # Update params
    params['location'] = f"{lat},{lng}"

    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    base_url = "https://maps.googleapis.com/maps/api/place/radarsearch/json"

    # Run a request, print the url, and convert to json
    starbucks_data = requests.get(base_url, params=params)
    print(starbucks_data.url)
    starbucks_data = starbucks_data.json()

    # Measure Starbucks count on the number of results in the retrieved area
    starbucks_count = len(starbucks_data["results"])

    print(f"Final Starbucks Count: {starbucks_count}")
    print("")

    # Store the Starbucks count into the Data Frame
    biz_by_zip_df.loc[index, "Starbucks Count"] = starbucks_count

    # Reset starbucks_count (so there is no chance that a previous record is influencing a latter one)
    starbucks_count = 0

# Visualize
biz_by_zip_df.head()

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Starbucks&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2729209%2C-97.74438630000002
Final Starbucks Count: 29

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Starbucks&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2603535%2C-97.7145152
Final Starbucks Count: 24

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Starbucks&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2915328%2C-97.76883579999999
Final Starbucks Count: 22

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Starbucks&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.24567279999999%2C-97.76883579999999
Final Starbucks Count: 27

https://maps.googleapis.com/maps/api/place/radarsearch/json?radius=5000&name=Starbucks&key=AIzaSyDUm_T-Gud02p8_cUhB_De4EnLKxp6KNUE&location=30.2961708%2C-97.73895429999999
Final Starbucks Count: 23

https

Unnamed: 0,Zip Code,Lat,Lng,Bank Count,Liquor Store Count,Wal-Mart Count,Starbucks Count
0,78701,30.2729,-97.7444,109,61,1,29
1,78702,30.2604,-97.7145,79,54,15,24
2,78703,30.2915,-97.7688,110,46,1,22
3,78704,30.2457,-97.7688,95,52,17,27
4,78705,30.2962,-97.739,108,54,14,23


In [21]:
output = os.path.join(rootdir, "data", "int", "businesses_by_zip.csv")
biz_by_zip_df.to_csv(output, index=False)
biz_by_zip_df

Unnamed: 0,Zip Code,Lat,Lng,Bank Count,Liquor Store Count,Wal-Mart Count,Starbucks Count,Target Count
0,78701,30.2729,-97.7444,109,61,1,29,7
1,78702,30.2604,-97.7145,79,54,15,24,2
2,78703,30.2915,-97.7688,110,46,1,22,2
3,78704,30.2457,-97.7688,95,52,17,27,5
4,78705,30.2962,-97.739,108,54,14,23,4
5,78717,30.4917,-97.7743,27,12,20,11,7
6,78721,30.2737,-97.6819,13,18,0,5,2
7,78722,30.292,-97.7118,98,50,1,22,4
8,78723,30.3081,-97.6819,22,24,13,3,3
9,78724,30.2944,-97.6223,0,2,0,0,1
