In [97]:
# Importing packages
import requests
import json
import time #will use this to pause execution for a few seconds

import numpy as np
import pandas as pd

### Setting up API key

In [98]:
# Extracting Google Key
from configparser import ConfigParser
config = ConfigParser()
config.read('config.ini')

# extract key from a separate ini file where the key is stored
API_KEY = config['google']['api_key']

# -------------------------------------------------------------------------------------------------------------------------

# Distribution Points

# -------------------------------------------------------------------------------------------------------------------------

### Pulling data out of Google

In [99]:
# The url format (backslash at the end is to continue to the next line, not part of the string)
# this is just an example of a typically url, not related to late code
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?\
location=-33.8670522%2C151.1957362\
&radius=1500\
&type=restaurant\
&key=" + API_KEY

In [100]:
# The initial string for the api
endpoint_url_start = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?"

In [112]:
# Specifying the types of places in a list
# common places will be searched in many small radius circles
places_common = ['bakery', 'cafe', 'restaurant', 'meal_delivery', 'meal_takeaway', 'bar', 'convenience store',\
                 'gas_station', 'supermarket', 'department_store', "tourist_attraction", "lodging"]
# uncommon places will be searched using Galveston as a whole
places_uncommon = ['airport', 'amusement_park', 'aquarium', 'casino', 'hospital',\
                   'stadium', 'university', 'primary_school',\
                   'school', 'secondary_school', 'zoo', 'movie_theater', 'shopping mall', "bowling_alley"]

In [113]:
# Creating dataframes to be filled
# dataframe for common types of places
galv_common = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status',\
                                     'types', 'delivery',\
                                    'dine_in', 'takeout', 'price_level', 'rating', 'user_ratings_total',\
                                    'serves_breakfast', 'serves_brunch','serves_lunch', 'serves_dinner',\
                                    'serves_vegetarian_food', 'serves_beer', 'serves_wine'])
# dataframe for uncommon types of places
galv_uncommon = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status',\
                                      'types', 'delivery',\
                                      'dine_in', 'takeout', 'price_level', 'rating', 'user_ratings_total',\
                                      'serves_breakfast', 'serves_brunch','serves_lunch', 'serves_dinner',\
                                      'serves_vegetarian_food', 'serves_beer', 'serves_wine'])

### Search as a large circle

In [114]:
# Basic parameters for Galveston as a whole
central_cord = "29.237030, -94.896354"
central_radius = "23000"

In [115]:
# function to append result to the dataframe
# Takes care of null returned from API request
# see API nearby search documentation for inside "" content
def append_place(json_results, df):
    l = len(df)
    for i, val in enumerate(json_results['results']):
        df.loc[l + i] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         val.get('vicinity', np.nan), 
                         val.get('geometry', np.nan).get('location', np.nan),
                         val.get('business_status', np.nan),
                         val.get('types', np.nan),
                         val.get('delivery', np.nan),
                         val.get('dine_in', np.nan),
                         val.get('takeout', np.nan),
                         val.get('price_level', np.nan),
                         val.get('rating', np.nan),
                         val.get('user_ratings_total', np.nan),
                         val.get('serves_breakfast', np.nan),
                         val.get('serves_brunch', np.nan),
                         val.get('serves_lunch',np.nan),
                         val.get('serves_dinner', np.nan),
                         val.get('serves_vegetarian_food', np.nan),
                         val.get('serves_beer', np.nan),
                         val.get('serves_wine', np.nan)]

In [116]:
# code doing the actual scraping of API
galv_uncommon_full = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status',\
                                      'types', 'delivery',\
                                      'dine_in', 'takeout', 'price_level', 'rating', 'user_ratings_total',\
                                      'serves_breakfast', 'serves_brunch','serves_lunch', 'serves_dinner',\
                                      'serves_vegetarian_food', 'serves_beer', 'serves_wine'])
for i in range(5):
    for _type in places_uncommon:
        endpoint_url = endpoint_url_start + "&location=" + central_cord + "&radius=" + \
        central_radius + "&region=us&type=" + _type + "&key=" + API_KEY
        
        params = {}
        
        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        append_place(results, galv_uncommon)
        
        time.sleep(2)
        
        # Pulling results from other pages since Googel only display 20 results per page
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token']
            res = requests.get(endpoint_url, params = params)
            results =  json.loads(res.content)
            append_place(results, galv_uncommon)
            time.sleep(2)
    galv_uncommon_full = pd.concat([galv_uncommon_full, galv_uncommon])


In [120]:
# weirdly google API does not return the same number of results for each type of place
# so we run the code for 5 times to get a more complete list
# then we drop the duplicates
galv_uncommon_full.drop_duplicates(subset=['place_id'], inplace=True)

In [125]:
galv_uncommon_full['category'] = galv_uncommon_full['types'].apply(lambda x: x[0])

### Search as various small circles

In [128]:
# Basic parameters for Galveston
# centers of small radius circles I drew (see map screenshot for visualization of the small circles)
central_cord_small = [
    '29.33453,-94.80537', "29.32641,-94.75220", "29.31378,-94.77920", "29.30675,-94.77173",
    "29.30836,-94.78982", "29.30470,-94.78305", "29.30018,-94.77810", "29.30478,-94.80023",
    "29.30085,-94.79133", "29.29828,-94.78461", "29.29248,-94.79003", "29.29620,-94.79872",
    "29.28758,-94.79818", "29.30279,-94.81760", "29.29502,-94.80681", "29.28360,-94.80671",
    "29.28823,-94.81638", "29.27799,-94.81312", "29.29547,-94.83998", "29.28702,-94.86711",
    "29.28284,-94.82435", "29.27298,-94.82200", "29.27706,-94.83324", "29.26801,-94.83089",
    "29.27297,-94.84745", "29.26087,-94.84009", "29.25825,-94.86888", "29.24091,-94.90649",
    "29.21049,-94.94509", "29.18998,-94.97939", "29.16899,-95.01119", "29.14771,-95.03933",
    "29.13656,-95.05589", "29.12952,-95.07055", "29.11749,-95.08840", "29.09715,-95.10609"
    
]
# radius of each small circles I drew (see map screenshot for visualization of the small circles)
central_radius_small = [
    "2867", "2390", "776.51", "638.38", "524.34", "480.32", "419.40", "603.24",
    "517.59", "531.26", "531.64", "496.02", "695.27", "1246", "694.75", "628.02",
    "728.18", "656.31", "1705", "1581", "684.73", "617.01", "642.26", "697.72",
    "1210", "663.12", "2464", "3657", "2909", "1902", "2596", "1461", "997.40",
    "1321", "1568", "2049"
]

In [129]:
# function to append result to the dataframe
# Takes care of null returned from API request
# same function as above
def append_place(json_results, df):
    l = len(df)
    for i, val in enumerate(json_results['results']):
        df.loc[l + i] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         val.get('vicinity', np.nan), 
                         val.get('geometry', np.nan).get('location', np.nan),
                         val.get('business_status', np.nan),
                         val.get('types', np.nan),
                         val.get('delivery', np.nan),
                         val.get('dine_in', np.nan),
                         val.get('takeout', np.nan),
                         val.get('price_level', np.nan),
                         val.get('rating', np.nan),
                         val.get('user_ratings_total', np.nan),
                         val.get('serves_breakfast', np.nan),
                         val.get('serves_brunch', np.nan),
                         val.get('serves_lunch',np.nan),
                         val.get('serves_dinner', np.nan),
                         val.get('serves_vegetarian_food', np.nan),
                         val.get('serves_beer', np.nan),
                         val.get('serves_wine', np.nan)]

In [130]:
# for each small circle
# search for all detailed info we need
# then add the results to the big dataframe
for i in range(0, len(central_cord_small)):
    for _type in places_common:
        endpoint_url = endpoint_url_start + "&location=" + central_cord_small[i] + "&radius=" + \
        central_radius_small[i] + "&region=us&type=" + _type + "&key=" + API_KEY

        params = {}

        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        # use the function we wrote before to append place to the galv_common df
        append_place(results, galv_common)

        # set a sleep time so that we won't burn the API tool
        time.sleep(1)

        # Pulling results from other pages
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token']
            res = requests.get(endpoint_url, params = params)
            results =  json.loads(res.content)
            append_place(results, galv_common)
            time.sleep(1)

In [134]:
galv_common.drop_duplicates(subset=['place_id'], inplace=True)
galv_common.shape
galv_common['category'] = galv_common['types'].apply(lambda x: x[0])
galv_common.head()

Unnamed: 0,Name,place_id,Address,Coordinates,business_status,types,delivery,dine_in,takeout,price_level,rating,user_ratings_total,serves_breakfast,serves_brunch,serves_lunch,serves_dinner,serves_vegetarian_food,serves_beer,serves_wine,category
0,The Galley,ChIJsUcIaOOdP4YR--DKKZ5UIok,"602 Seawolf Parkway, Galveston","{'lat': 29.3184692, 'lng': -94.8150284}",CLOSED_TEMPORARILY,"[bar, restaurant, food, point_of_interest, est...",,,,1.0,4.0,39.0,,,,,,,,bar
2,Galveston,ChIJWZIFxMhZP4YRwezquckDi-U,Galveston,"{'lat': 29.3013479, 'lng': -94.7976958}",,"[locality, political]",,,,,,,,,,,,,,locality
3,Texas A&M University at Galveston,ChIJpWF4AeKcP4YRr1_9-oDh0Ew,"200 Seawolf Parkway, Galveston","{'lat': 29.3154967, 'lng': -94.8182868}",OPERATIONAL,"[university, point_of_interest, establishment]",,,,,4.6,121.0,,,,,,,,university
4,Tiger Tanks Offshore Rental,ChIJad4FLSKeP4YRGZT8XY5jKMc,"1300 Coastwide Road, Galveston","{'lat': 29.324723, 'lng': -94.787048}",OPERATIONAL,"[general_contractor, point_of_interest, establ...",,,,,,,,,,,,,,general_contractor
5,T & T Offshore,ChIJP1ldXhWeP4YRDhsP8J3BP4Y,"2915 Todd Road, Galveston","{'lat': 29.3127289, 'lng': -94.8026897}",OPERATIONAL,"[car_repair, point_of_interest, store, establi...",,,,,4.5,52.0,,,,,,,,car_repair


### Remove rows from other cities

In [141]:
# since the radius also includes Texas City, we need to filter for Galveston locations only
# for the dataframe of uncommon types of locations
# first reset the index
galv_uncommon_full.reset_index(drop=True, inplace=True)
for i in range(0, len(galv_uncommon_full)):
    string = galv_uncommon_full['Address'][i]
    if "Galveston" not in string:
        galv_uncommon_full.drop(labels=i, axis=0, inplace=True)

In [142]:
# since the radius also includes Texas City, we need to filter for Galveston locations only
# for the dataframe of common types of locations
# first reset the index
galv_common.reset_index(drop=True, inplace=True)
for i in range(0, len(galv_common)):
    string = galv_common['Address'][i]
    if "Galveston" not in string:
        galv_common.drop(labels=i, axis=0, inplace=True)

### Search & Add Business Hours to Dataframe

In [144]:
# Creating a list of unique place ids that we can use in Google Places Details API search to get business hours
g_common_ids = np.unique(galv_common['place_id'])
g_uncommon_ids = np.unique(galv_uncommon_full['place_id'])

In [145]:
# creating dataset to fill with business hours
g_common_hours = pd.DataFrame(columns=['Name','place_id','business_hours', 'business_hours_text'])
g_uncommon_hours = pd.DataFrame(columns=['Name','place_id','business_hours','business_hours_text'])

In [146]:
# new api request string for new type of API requests
endpoint_url_start_2 = "https://maps.googleapis.com/maps/api/place/details/json?place_id="

In [147]:
# new function to append results from Places Details search to business hours dataset
def append_place_2(json_results, df):
    l = len(df)
    val = json_results['result']
    if len(val) == 2:
        df.loc[l + 1] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         np.nan,
                         np.nan]
    else:
        df.loc[l + 1] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         val.get('opening_hours', np.nan).get('periods', np.nan),
                         val.get('opening_hours', np.nan).get('weekday_text', np.nan)]

In [148]:
# creating a business hours dataset for the common places
for place_id in g_common_ids:
    endpoint_url = endpoint_url_start_2 + place_id + \
    "&fields=name%2Cplace_id%2Copening_hours&key=" + API_KEY
    
    params = {}
    
    res = requests.get(endpoint_url, params = params)
    results =  json.loads(res.content)
    append_place_2(results, g_common_hours)
    
    time.sleep(1)

In [149]:
# view created dataset to check it looks ok
g_common_hours.head()

Unnamed: 0,Name,place_id,business_hours,business_hours_text
1,Heffernan Insurance Inc,ChIJ-0BVfWeeP4YRQ8ZAfTZTOxg,"[{'close': {'day': 1, 'time': '1200'}, 'open':...","[Monday: 9:00 AM – 12:00 PM, 1:00 – 5:00 PM, T..."
2,Central Middle School,ChIJ-1C15ZUOP4YRPXWU9LUmskw,"[{'close': {'day': 1, 'time': '1510'}, 'open':...","[Monday: 7:30 AM – 3:10 PM, Tuesday: 7:30 AM –..."
3,Knapp Flower Shop,ChIJ-2ieSNGdP4YR7lhSA5Yep3Y,"[{'close': {'day': 1, 'time': '1700'}, 'open':...","[Monday: 8:30 AM – 5:00 PM, Tuesday: 8:30 AM –..."
4,Chico Tile,ChIJ-2vRpNGdP4YRHqjTy_WGhos,"[{'close': {'day': 1, 'time': '1800'}, 'open':...","[Monday: 8:00 AM – 6:00 PM, Tuesday: 8:00 AM –..."
5,Galveston Railroad Msm the Center,ChIJ-3PZpm2eP4YRzoqGfg2Mb_k,,


In [150]:
# creating a business hours dataset for the uncommon places
for place_id in g_uncommon_ids:
    endpoint_url = endpoint_url_start_2 + place_id + \
    "&fields=name%2Cplace_id%2Copening_hours&key=" + API_KEY
    
    params = {}
    
    res = requests.get(endpoint_url, params = params)
    results =  json.loads(res.content)
    append_place_2(results, g_uncommon_hours)
    
    time.sleep(1)

In [151]:
# view created dataset to check it looks ok
g_uncommon_hours.head()

Unnamed: 0,Name,place_id,business_hours,business_hours_text
1,Central Middle School,ChIJ-1C15ZUOP4YRPXWU9LUmskw,"[{'close': {'day': 1, 'time': '1510'}, 'open':...","[Monday: 7:30 AM – 3:10 PM, Tuesday: 7:30 AM –..."
2,Parker Elementary School,ChIJ04buoE-cP4YRYESEn5EPNO8,,
3,Hilton Galveston Island Resort,ChIJ074TKjucP4YRRetI2oO6DCo,,
4,Maravilla Condos,ChIJ0TRchYWcP4YRsSayVep3UR0,,
5,Inn at the Waterpark,ChIJ0YzmJFmcP4YRlEJHWHuYYIg,"[{'open': {'day': 0, 'time': '0000'}}]","[Monday: Open 24 hours, Tuesday: Open 24 hours..."


In [155]:

# merging previous datasets with business hours datasets
galv_common = galv_common.merge(g_common_hours, how='left', left_on=['Name', 'place_id'], right_on=['Name', 'place_id'])
galv_uncommon_full = galv_uncommon_full.merge(g_uncommon_hours, how='left', left_on=['Name', 'place_id'], right_on=['Name', 'place_id'])

# -------------------------------------------------------------------------------------------------------------------------

# Collection Points

# -------------------------------------------------------------------------------------------------------------------------

## Pulling data out of Google

In [156]:
# The initial string for the api
endpoint_url_start = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?"

In [157]:
# Specifying the types of places in a list
# common places will be searched in many small radius circles
clct_common = ['bus_station', 'light_rail_station', 'parking', \
              'taxi_stand', 'train_station', 'transit_station']
# uncommon places will be searched using Galveston as a whole
clct_uncommon = ['art_gallery', 'campground', 'car_rental', 'museum', 'night_club',\
                'spa', 'travel_agency']

In [158]:
# Creating dataframes to be filled
# dataframe for common types of places
galv_clct_common = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status','types'])
# dataframe for uncommon types of places
galv_clct_uncommon = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status','types'])

## Search as a large circle

In [160]:
# function to append result to the dataframe
# Takes care of null returned from API request
# see API nearby search documentation for inside "" content
def append_clct_place(json_results, df):
    l = len(df)
    for i, val in enumerate(json_results['results']):
        df.loc[l + i] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         val.get('vicinity', np.nan), 
                         val.get('geometry', np.nan).get('location', np.nan),
                         val.get('business_status', np.nan),
                         val.get('types', np.nan)]

In [172]:
# code doing the actual scraping of API
galv_clct_uncommon_full = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status','types'])
for i in range(5):
    for _type in clct_uncommon:
        endpoint_url = endpoint_url_start + "&location=" + central_cord + "&radius=" + \
        central_radius + "&region=us&type=" + _type + "&key=" + API_KEY
        
        params = {}
        
        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        append_clct_place(results, galv_clct_uncommon)
        
        
        time.sleep(1)
        
        # Pulling results from other pages since Googel only display 20 results per page
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token']
            res = requests.get(endpoint_url, params = params)
            results =  json.loads(res.content)
            append_clct_place(results, galv_clct_uncommon)
            time.sleep(1)
    galv_clct_uncommon_full = pd.concat([galv_clct_uncommon_full, galv_clct_uncommon])

In [176]:
# drop duplicates
galv_clct_uncommon_full.drop_duplicates(subset=['place_id'], inplace=True)
galv_clct_uncommon_full.reset_index(drop=True, inplace=True)
galv_clct_uncommon_full['category'] = galv_clct_uncommon_full['types'].apply(lambda x: x[0])
galv_clct_uncommon_full.head()

Unnamed: 0,Name,place_id,Address,Coordinates,business_status,types,category
0,Mamady's Primitive Art From,ChIJg37VFGueP4YR6fdJF1ue33c,"2211 The Strand, Galveston","{'lat': 29.307213, 'lng': -94.7937577}",OPERATIONAL,"[art_gallery, point_of_interest, establishment]",art_gallery
1,Bogan Gallery,ChIJ_____xueP4YRNB441Lq7RHc,"2217 Postoffice St, Galveston","{'lat': 29.3044721, 'lng': -94.79293539999999}",CLOSED_TEMPORARILY,"[art_gallery, point_of_interest, establishment]",art_gallery
2,Affaire d'Art Fine Art Gallery,ChIJkfdtYmueP4YR1PRZD3AHIXI,"2227 Postoffice St, Galveston","{'lat': 29.304436, 'lng': -94.793165}",OPERATIONAL,"[art_gallery, point_of_interest, establishment]",art_gallery
3,The Proletariat Gallery & Public House,ChIJJ98no2ueP4YR-eQcpeWIIeg,"2221 Market Street #100, Galveston","{'lat': 29.3053353, 'lng': -94.7935713}",OPERATIONAL,"[bar, art_gallery, cafe, food, store, point_of...",bar
4,Gallery Évasion,ChIJq07NcWueP4YRVpwZWyy3aRk,"217 23rd Street, Galveston","{'lat': 29.3065075, 'lng': -94.79442279999999}",OPERATIONAL,"[art_gallery, point_of_interest, establishment]",art_gallery


## Search as small circles

In [179]:
# function to append result to the dataframe
# Takes care of null returned from API request
# see API nearby search documentation for inside "" content
def append_clct_place(json_results, df):
    l = len(df)
    for i, val in enumerate(json_results['results']):
        df.loc[l + i] = [val.get('name', np.nan), 
                         val.get('place_id', np.nan),
                         val.get('vicinity', np.nan), 
                         val.get('geometry', np.nan).get('location', np.nan),
                         val.get('business_status', np.nan),
                         val.get('types', np.nan)]

In [180]:
# for each small circle
# search for all detailed info we need
# then add the results to the big dataframe
for i in range(0, len(central_cord_small)):
    for _type in clct_common:
        endpoint_url = endpoint_url_start + "&location=" + central_cord_small[i] + "&radius=" + \
        central_radius_small[i] + "&region=us&type=" + _type + "&key=" + API_KEY

        params = {}

        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        # use the function we wrote before to append place to the galv_common df
        append_clct_place(results, galv_clct_common)


        time.sleep(1)

        # Pulling results from other pages
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token']
            res = requests.get(endpoint_url, params = params)
            results =  json.loads(res.content)
            append_clct_place(results, galv_clct_common)
            time.sleep(1)

In [183]:
galv_clct_common.drop_duplicates(subset=['place_id'], inplace=True)
galv_clct_common.reset_index(drop=True, inplace=True)
galv_clct_common['category'] = galv_clct_common['types'].apply(lambda x: x[0])
galv_clct_common.head()

Unnamed: 0,Name,place_id,Address,Coordinates,business_status,types,category
0,602 Seawolf Pkwy Parking,ChIJGyMXOfudP4YRDOB7F3SyWzo,"602 Seawolf Pkwy, Galveston","{'lat': 29.3206562, 'lng': -94.8172833}",OPERATIONAL,"[parking, point_of_interest, establishment]",parking
1,200 Seawolf Pkwy Parking,ChIJtdCk4uSdP4YRhpk5DmlvLFU,"200 Seawolf Pkwy, Galveston","{'lat': 29.3175257, 'lng': -94.81833019999999}",OPERATIONAL,"[parking, point_of_interest, establishment]",parking
2,3018 Texas Clipper Rd Parking,ChIJEd4jQuSdP4YRc9jMxtmw4z0,"3018 Texas Clipper Road, Galveston","{'lat': 29.3157226, 'lng': -94.81716879999999}",OPERATIONAL,"[parking, point_of_interest, establishment]",parking
3,Parking lot,ChIJEejTyYd1P4YRfGsq41f6gms,Galveston,"{'lat': 29.3353876, 'lng': -94.77805839999999}",,"[premise, parking, point_of_interest, establis...",premise
4,Parking utmb,ChIJAdzz9keeP4YRzUtgHz_IYYU,"365-379 11th Street, Galveston","{'lat': 29.3091755, 'lng': -94.7820206}",OPERATIONAL,"[parking, point_of_interest, establishment]",parking


## Remove rows from other cities

In [187]:
# since the radius also includes Texas City, we need to filter for Galveston locations only
# for the dataframe of uncommon types of locations
for i in range(0, len(galv_clct_uncommon_full)):
    string = galv_clct_uncommon_full['Address'][i]
    if "Galveston" not in string:
        galv_clct_uncommon_full.drop(labels=i, axis=0, inplace=True)

In [188]:
# since the radius also includes Texas City, we need to filter for Galveston locations only
# for the dataframe of common types of locations
for i in range(0, len(galv_clct_common)):
    string = galv_clct_common['Address'][i]
    if "Galveston" not in string:
        galv_clct_common.drop(labels=i, axis=0, inplace=True)

## Keyword search for residential areas

In [190]:
# residential places types
clct_residential = ['apartment', 'condominium', 'townhouse']
# dataframe for residential places
galv_resid = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status','types'])

In [191]:
# code doing the actual scraping of API
galv_resid_full = pd.DataFrame(columns=['Name','place_id', 'Address','Coordinates', 'business_status','types'])
for i in range(5):
    for _keyword in clct_residential:
        endpoint_url = endpoint_url_start + "&location=" + central_cord + "&radius=" + \
        central_radius + '&keyword=' + _keyword + "&key=" + API_KEY
        
        params = {}
        
        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        append_clct_place(results, galv_resid)
        
        
        time.sleep(2)
        
        # Pulling results from other pages since Googel only display 20 results per page
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token']
            res = requests.get(endpoint_url, params = params)
            results =  json.loads(res.content)
            append_clct_place(results, galv_resid)
            time.sleep(2)
    galv_resid_full = pd.concat([galv_resid_full, galv_resid])

In [194]:
galv_resid_full.drop_duplicates(subset=['place_id'], inplace=True)
galv_resid_full.reset_index(drop=True, inplace=True)
galv_resid_full['category'] = galv_resid_full['types'].apply(lambda x: x[0])
galv_resid_full.head()

Unnamed: 0,Name,place_id,Address,Coordinates,business_status,types,category
0,Baypointe Manor Apartments,ChIJx2rkZ3V5P4YRsWL9KCSDm9w,"2701 13th Ave N, Texas City","{'lat': 29.3965673, 'lng': -94.93472779999999}",OPERATIONAL,"[point_of_interest, establishment]",point_of_interest
1,Veranda,ChIJQ3N6ahR5P4YRzCcg3fFv1BU,"3700 9th Ave N, Texas City","{'lat': 29.39241, 'lng': -94.95376379999999}",OPERATIONAL,"[real_estate_agency, point_of_interest, establ...",real_estate_agency
2,Terraces Apartments,ChIJNSOm9o95P4YRdwuwvYGdYPc,"2602 21st St N, Texas City","{'lat': 29.4095159, 'lng': -94.92460849999999}",OPERATIONAL,"[real_estate_agency, point_of_interest, establ...",real_estate_agency
3,Windsor Estates Apartments,ChIJHSxGTnF-P4YRNvi-UxVwEl0,"2801 FM 2004 road, 2700 Gulf Fwy, Texas City","{'lat': 29.4099828, 'lng': -95.0314811}",OPERATIONAL,"[point_of_interest, establishment]",point_of_interest
4,Coral Manor Apartments,ChIJr3ro38R5P4YR7VSzmEIC-Fw,"729 5th Ave N, Texas City","{'lat': 29.3890037, 'lng': -94.9050365}",OPERATIONAL,"[point_of_interest, establishment]",point_of_interest


In [196]:
# since the radius also includes Texas City, we need to filter for Galveston locations only
# for the dataframe of uncommon types of locations
for i in range(0, len(galv_resid_full)):
    string = galv_resid_full['Address'][i]
    if "Galveston" not in string:
        galv_resid_full.drop(labels=i, axis=0, inplace=True)

In [197]:
galv_resid.head()

Unnamed: 0,Name,place_id,Address,Coordinates,business_status,types
0,Baypointe Manor Apartments,ChIJx2rkZ3V5P4YRsWL9KCSDm9w,"2701 13th Ave N, Texas City","{'lat': 29.3965673, 'lng': -94.93472779999999}",OPERATIONAL,"[point_of_interest, establishment]"
1,Veranda,ChIJQ3N6ahR5P4YRzCcg3fFv1BU,"3700 9th Ave N, Texas City","{'lat': 29.39241, 'lng': -94.95376379999999}",OPERATIONAL,"[real_estate_agency, point_of_interest, establ..."
2,Terraces Apartments,ChIJNSOm9o95P4YRdwuwvYGdYPc,"2602 21st St N, Texas City","{'lat': 29.4095159, 'lng': -94.92460849999999}",OPERATIONAL,"[real_estate_agency, point_of_interest, establ..."
3,Windsor Estates Apartments,ChIJHSxGTnF-P4YRNvi-UxVwEl0,"2801 FM 2004 road, 2700 Gulf Fwy, Texas City","{'lat': 29.4099828, 'lng': -95.0314811}",OPERATIONAL,"[point_of_interest, establishment]"
4,Coral Manor Apartments,ChIJr3ro38R5P4YR7VSzmEIC-Fw,"729 5th Ave N, Texas City","{'lat': 29.3890037, 'lng': -94.9050365}",OPERATIONAL,"[point_of_interest, establishment]"


# Saving all generated dataframes above

In [198]:
galv_common.to_csv('galv_common.csv')
galv_uncommon.to_csv('galv_uncommon.csv')
galv_clct_common.to_csv('galv_clct_common.csv')
galv_clct_uncommon.to_csv('galv_clct_uncommon.csv')
galv_resid.to_csv('galv_resid.csv')

# -------------------------------------------------------------------------------------------------------------------------

# Distribution Points Revisited for Weighting (with additional columns in the dataframe)

# -------------------------------------------------------------------------------------------------------------------------

In [226]:
# Importing packages
import geopandas as gpd
import fiona
from matplotlib import pyplot as plt
from matplotlib import image as mpimg

In [258]:
#reading in cleaned distribution site data
galv_sites = pd.read_csv("galv_FUE_filtered.csv")

# dropping dubplicate observations
galv_sites = galv_sites.reset_index()

# display max number of columns
pd.set_option('display.max_columns', None)

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [259]:
# for 'FALSE' in business_status, change it to 'OPERATIONAL'
galv_sites['business_status'] = galv_sites['business_status'].replace('FALSE', 'OPERATIONAL')
# rename columns so that we can distinguish google id and yelp id
galv_sites.rename(columns={'place_id': 'google_id', 'id': 'yelp_id'}, inplace=True)


In [231]:
name_counts = pd.DataFrame(galv_sites['Name'].value_counts())
name_counts[name_counts['Name'] > 2]
# look for chains that might be local that I'd be unfamiliar with (do a quick google search to see what they look like)

Unnamed: 0,Name
Subway,10
Starbucks,9
Valero,7
Jack in the Box,5
ATM,5
McDonald's,5
SP+ Parking,4
Parking,4
Exxon,4
Circle K,4


In [111]:
""" # list of fast food places
fast_food = ['IHOP', 'Taco Bell', 'Jamba', 'Starbucks', 'KFC', 'Valero', 'Jack in the Box',\
             'Pizza Hut', 'Subway', "McDonald's", 'Cinnabon', 'Burger Kind', '7-Eleven',\
             'Dairy Queen/Orange Julius Treat Ctr', 'Panda Express', "Domino's Pizza",\
             'L&L Hawaiian Barbecue', 'Minit Stop', 'Circle K', 'Whatabureger', "Schlotzsky's"]

# create a binary variable for if a place is a fast food place based on business name
galv_sites['fast_food'] = galv_sites['Name'].isin(fast_food)
galv_sites['fast_food'] = np.multiply(galv_sites['fast_food'] , 1) """

In [112]:
""" # load in collection  sites (not cleaned yet)
uncommon = pd.read_csv("dirty_galv_uncommon_v4.csv")
common = pd.read_csv("dirty_galv_common_v4.csv")
galv_dist_dirty = pd.concat([uncommon, common])
galv_dist_dirty.head()

# get location variables
galv_dirty_lat = []
galv_dirty_long = []
for i in galv_dist_dirty["Coordinates"]:
    dic = eval(i)
    try:
        galv_dirty_lat.append(dic["lat"])
    except:
        galv_dirty_lat.append(dic["latitude"])
    try:
        galv_dirty_long.append(dic["lng"])
    except:
        galv_dirty_long.append(dic["longitude"])

galv_dist_dirty = galv_dist_dirty.assign(Lat = galv_dirty_lat, Long = galv_dirty_long)
galv_dist_dirty = gpd.GeoDataFrame(galv_dist_dirty, geometry=gpd.points_from_xy(galv_dist_dirty.Long, galv_dist_dirty.Lat))

#select relevant variables (columns that I want to merge into the cleaned data)
dist_dirty_merge = galv_dist_dirty[['Name','Address', 'geometry',\
                                     'user_ratings_total', 'types',\
                                     'business_hours', 'business_hours_text']]
dist_dirty_merge = dist_dirty_merge.rename(columns = {'geometry':'Coordinates'})

#drop duplicate observations if there are any
dist_dirty_merge = dist_dirty_merge.drop_duplicates() """

In [113]:
""" # merging desired vairables from dirty data into clean data
galv_dist_detail = pd.merge(galv_sites, dist_dirty_merge, on=['Name','Address','Coordinates'], how='left')

drop_indicator = galv_dist_detail[['Name', 'Address']].duplicated(keep='last').tolist()
galv_dist_detail = galv_dist_detail.assign(Keep = drop_indicator)
galv_dist_detail = galv_dist_detail[galv_dist_detail['Keep'] == False]
galv_dist_detail = galv_dist_detail.drop(columns = ['Keep'])
galv_dist_detail = galv_dist_detail.reset_index()
galv_dist_detail.head() """

Unnamed: 0,index,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,types,business_hours,business_hours_text
0,0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston",POINT (-94.85032 29.27346),29.27346,-94.850322,0,,,,
1,1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston",POINT (-94.82021 29.27075),29.270753,-94.820207,0,7369.0,"['restaurant', 'food', 'point_of_interest', 'e...","[{'close': {'day': 0, 'time': '2100'}, 'open':...",['Monday: 11:00\u202fAM\u2009–\u20099:00\u202f...
2,3,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston",POINT (-94.82023 29.27074),29.270735,-94.820232,0,3908.0,"['meal_takeaway', 'restaurant', 'point_of_inte...","[{'close': {'day': 0, 'time': '2100'}, 'open':...",['Monday: 11:00\u202fAM\u2009–\u20099:00\u202f...
3,4,Number 13 Prime Steak and Seafood,"7809 Broadway Street, Galveston",POINT (-94.85754 29.28509),29.285088,-94.857536,0,1034.0,"['bar', 'restaurant', 'food', 'point_of_intere...","[{'close': {'day': 0, 'time': '2100'}, 'open':...","['Monday: Closed', 'Tuesday: 4:00\u2009–\u2009..."
4,5,Fisherman's Wharf,"2200 Harborside Drive, Galveston",POINT (-94.79364 29.30893),29.308931,-94.793637,0,5616.0,"['restaurant', 'food', 'point_of_interest', 'e...",,


In [114]:
""" # creating variables for businesses that would use more foodware

galv_dist_detail['bakery'] = 0
galv_dist_detail['cafe'] = 0
galv_dist_detail['meal_delivery'] = 0
galv_dist_detail['meal_takeaway'] = 0
galv_dist_detail['restaurant'] = 0
galv_dist_detail['food'] = 0

for i in range(len(galv_dist_detail)):
    types = galv_dist_detail['types'][i]
    
    if type(types) != float:    
        if 'bakery' in types:
            galv_dist_detail['bakery'][i] = 1
        if 'cafe' in types:
            galv_dist_detail['cafe'] = 1
        if 'meal_delivery' in types:
            galv_dist_detail['meal_delivery'] = 1
        if 'meal_takeaway' in types:
            galv_dist_detail['meal_takeaway'] = 1
        if 'restaurant' in types:
            galv_dist_detail['restaurant'][i] = 1
        if 'food' in types:
            galv_dist_detail['food'][i] = 1
        
    if np.isnan(galv_dist_detail['user_ratings_total'][i]):
        galv_dist_detail['user_ratings_total'][i] = 0
            
galv_dist_detail.head() """

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galv_dist_detail['user_ratings_total'][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galv_dist_detail['restaurant'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galv_dist_detail['food'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  galv_dist_detail['bakery'][i] = 1


Unnamed: 0,index,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,types,business_hours,business_hours_text,bakery,cafe,meal_delivery,meal_takeaway,restaurant,food
0,0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston",POINT (-94.85032 29.27346),29.27346,-94.850322,0,0.0,,,,0,1,1,1,0,0
1,1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston",POINT (-94.82021 29.27075),29.270753,-94.820207,0,7369.0,"['restaurant', 'food', 'point_of_interest', 'e...","[{'close': {'day': 0, 'time': '2100'}, 'open':...",['Monday: 11:00\u202fAM\u2009–\u20099:00\u202f...,0,1,1,1,1,1
2,3,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston",POINT (-94.82023 29.27074),29.270735,-94.820232,0,3908.0,"['meal_takeaway', 'restaurant', 'point_of_inte...","[{'close': {'day': 0, 'time': '2100'}, 'open':...",['Monday: 11:00\u202fAM\u2009–\u20099:00\u202f...,0,1,1,1,1,1
3,4,Number 13 Prime Steak and Seafood,"7809 Broadway Street, Galveston",POINT (-94.85754 29.28509),29.285088,-94.857536,0,1034.0,"['bar', 'restaurant', 'food', 'point_of_intere...","[{'close': {'day': 0, 'time': '2100'}, 'open':...","['Monday: Closed', 'Tuesday: 4:00\u2009–\u2009...",0,1,1,1,1,1
4,5,Fisherman's Wharf,"2200 Harborside Drive, Galveston",POINT (-94.79364 29.30893),29.308931,-94.793637,0,5616.0,"['restaurant', 'food', 'point_of_interest', 'e...",,,0,1,1,1,1,1


In [260]:
# creating a function to determine number of hours a business is open a week (if business hours are available)
def find_hours(bus_hours):
    if type(bus_hours) != float: 
        hours = 0
        for i in range(7):

            day = "'day': {}".format(i)
            if bus_hours.count(day) == 2:
                time_open = int(bus_hours.split("'open': {'day': " + str(i) + ", 'time': '")[1][0:4])
                time_close = int(bus_hours.split("'close': {'day': " + str(i) + ", 'time': '")[1][0:4])
                if time_close < time_open:
                    time_close += 2400
                hours += round((time_close - time_open)/100)
                if (time_close - time_open) % 100 != 0:
                    min_close = int(str(time_close)[-2:])
                    min_open = int(str(time_open)[-2:])
                    hours -= (min_open - min_close)/60
            if i == 0 and bus_hours.count(day) == 1:
                return 24 * 7
        return hours

In [261]:
# using function to determine business hours in the dataset
galv_sites['hours_open'] = np.nan

for i in range(len(galv_sites)):
    galv_sites['hours_open'][i] = find_hours(galv_sites['business_hours'][i])

In [117]:
""" # creating convenience score to use as a weight

galv_dist_detail['user_ratings_total'] = np.where(galv_dist_detail['user_ratings_total'] == 0,\
                                                  1, galv_dist_detail['user_ratings_total'])
galv_dist_detail['hours_open'] = np.where(np.isnan(galv_dist_detail['hours_open']),\
                                          np.nanmedian(galv_dist_detail['hours_open']), galv_dist_detail['hours_open'])

# normalizing variables used in score
from sklearn import preprocessing
def normalize_var(variable):
    scaler = preprocessing.MinMaxScaler()
    return scaler.fit_transform(np.array(variable).reshape(-1,1))
    
galv_dist_detail['user_ratings_total'] = normalize_var(galv_dist_detail['user_ratings_total'])
galv_dist_detail['hours_open'] = normalize_var(galv_dist_detail['hours_open'])

#creating weight
galv_dist_detail['weight'] = (galv_dist_detail['user_ratings_total'] + 5 * galv_dist_detail['fast_food'] +
                             3 * galv_dist_detail['bakery'] + 3 * galv_dist_detail['cafe'] +
                             1.5 * galv_dist_detail['meal_delivery'] + 2 * galv_dist_detail['meal_takeaway'] +
                             1.5 * galv_dist_detail['restaurant'] + 3 * galv_dist_detail['food'] +
                             2 * galv_dist_detail['hours_open'])

galv_dist_detail = galv_dist_detail[['Name', 'Address', 'Coordinates', 'Lat', 'Long',
                                     'fast_food', 'user_ratings_total', 'bakery', 'cafe',
                                     'meal_delivery', 'meal_takeaway', 'restaurant', 'food',
                                     'hours_open', 'weight']]
galv_dist_detail.head() """

Unnamed: 0,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,bakery,cafe,meal_delivery,meal_takeaway,restaurant,food,hours_open,weight
0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston",POINT (-94.85032 29.27346),29.27346,-94.850322,0,0.0,0,1,1,1,0,0,0.360119,7.220238
1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston",POINT (-94.82021 29.27075),29.270753,-94.820207,0,0.449872,0,1,1,1,1,1,0.428571,12.307015
2,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston",POINT (-94.82023 29.27074),29.270735,-94.820232,0,0.238552,0,1,1,1,1,1,0.428571,12.095695
3,Number 13 Prime Steak and Seafood,"7809 Broadway Street, Galveston",POINT (-94.85754 29.28509),29.285088,-94.857536,0,0.063072,0,1,1,1,1,1,0.27381,11.610691
4,Fisherman's Wharf,"2200 Harborside Drive, Galveston",POINT (-94.79364 29.30893),29.308931,-94.793637,0,0.342838,0,1,1,1,1,1,0.360119,12.063076


In [262]:
galv_FUEs = galv_sites[(
    (galv_sites['category'] == 'bar') | (galv_sites['category'] == 'cafe') | 
    (galv_sites['category'] == 'food') | (galv_sites['category'] == 'restaurant') |
    (galv_sites['category'] == 'bakery') | (galv_sites['category'] == 'meal_delivery') |
    (galv_sites['category'] == 'meal_takeaway') | (galv_sites['category'] == 'convenience_store') |
    (galv_sites['category'] == 'school') | (galv_sites['category'] == 'grocery_or_supermarket') |
    (galv_sites['category'] == 'university') | (galv_sites['category'] == 'supermarket') |
    (galv_sites['category'] == 'secondary_school') | (galv_sites['category'] == 'primary_school') |
    (galv_sites['category'] == 'airport') | (galv_sites['category'] == 'amusement_park') |
    (galv_sites['category'] == 'aquarium') | (galv_sites['category'] == 'movie_theater') )]

In [263]:
galv_FUEs.to_csv('galv_FUEs.csv')

In [264]:
galv_sites.to_csv('galv_complete.csv')

# below i processed the data for hilo as well because i cannot find the original file for hilo

In [269]:
#reading in cleaned distribution site data
hilo_sites = pd.read_csv("hilo_FUE_filtered.csv")

# dropping dubplicate observations
hilo_sites = hilo_sites.reset_index()
hilo_sites = hilo_sites.drop_duplicates(subset = ['Name'], inplace=True)

# display max number of columns
pd.set_option('display.max_columns', None)

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [270]:
# for 'FALSE' in business_status, change it to 'OPERATIONAL'
hilo_sites['business_status'] = hilo_sites['business_status'].replace('FALSE', 'OPERATIONAL')
# rename columns so that we can distinguish google id and yelp id
hilo_sites.rename(columns={'place_id': 'google_id', 'id': 'yelp_id'}, inplace=True)


In [271]:
name_counts = pd.DataFrame(hilo_sites['Name'].value_counts())
name_counts[name_counts['Name'] > 2]
# look for chains that might be local that I'd be unfamiliar with (do a quick google search to see what they look like)

Unnamed: 0,Name
Starbucks,7
McDonald's,6
76,5
Pizza Hut,5
Subway,4
Taco Bell,4
Kula Shave Ice,3
L&L Hawaiian Barbecue,3
Hele,3
Burger King,3


In [272]:
# using function to determine business hours in the dataset
hilo_sites['hours_open'] = np.nan

for i in range(len(hilo_sites)):
    hilo_sites['hours_open'][i] = find_hours(hilo_sites['business_hours'][i])

In [273]:
hilo_FUEs = hilo_sites[(
    (hilo_sites['category'] == 'bar') | (hilo_sites['category'] == 'cafe') | 
    (hilo_sites['category'] == 'food') | (hilo_sites['category'] == 'restaurant') |
    (hilo_sites['category'] == 'bakery') | (hilo_sites['category'] == 'meal_delivery') |
    (hilo_sites['category'] == 'meal_takeaway') | (hilo_sites['category'] == 'convenience_store') |
    (hilo_sites['category'] == 'school') | (hilo_sites['category'] == 'grocery_or_supermarket') |
    (hilo_sites['category'] == 'university') | (hilo_sites['category'] == 'supermarket') |
    (hilo_sites['category'] == 'secondary_school') | (hilo_sites['category'] == 'primary_school') |
    (hilo_sites['category'] == 'airport') | (hilo_sites['category'] == 'amusement_park') |
    (hilo_sites['category'] == 'aquarium') | (hilo_sites['category'] == 'movie_theater') )]

In [274]:
hilo_FUEs.to_csv('hilo_FUEs.csv')

In [275]:
hilo_sites.to_csv('hilo_complete.csv')