In [1]:
# Importing packages
import requests
import json
import math

import numpy as np
import pandas as pd

import geopandas as gpd
import fiona
from matplotlib import pyplot as plt
from matplotlib import image as mpimg

from sklearn import preprocessing

In [2]:
#reading in cleaned distribution site data
hilo_dist = pd.read_csv("../data/complete_hilo.csv")
# need coordinates for data analysis, so dropping observations without them
hilo_dist = hilo_dist.dropna(subset=['Coordinates'])

hilo_lat = []
hilo_long = []
for i in hilo_dist["Coordinates"]:
    dic = eval(i)
    try:
        hilo_lat.append(dic["lat"])
    except:
        hilo_lat.append(dic["latitude"])
    try:
        hilo_long.append(dic["lng"])
    except:
        hilo_long.append(dic["longitude"])

hilo_dist = hilo_dist.assign(Lat = hilo_lat, Long = hilo_long)
hilo_dist = gpd.GeoDataFrame(hilo_dist, geometry=gpd.points_from_xy(hilo_dist.Long, hilo_dist.Lat))

hilo_dist = hilo_dist[['Name', 'Address', 'geometry', 'Lat', 'Long']]
hilo_dist = hilo_dist.rename(columns = {'geometry':'Coordinates'})
hilo_dist = hilo_dist.drop_duplicates()
hilo_dist

  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)


Unnamed: 0,Name,Address,Coordinates,Lat,Long
0,Hilo Bay Medical Clinic,"24 Pukihae Street, Hilo",POINT (-155.08795 19.73014),19.730137,-155.087946
1,Wild Ginger Hotel,"100 Puueo Street, Hilo",POINT (-155.08918 19.72932),19.729316,-155.089175
2,Dolphin Bay Hotel,"333 Iliahi Street, Hilo",POINT (-155.09043 19.72923),19.729229,-155.090427
3,Hilo Bay Oceanfront Bed and Breakfast,"56 Pukihae Street, Hilo",POINT (-155.08816 19.73098),19.730976,-155.088165
4,11 Arruda Ln.,Hilo,POINT (-155.08903 19.72859),19.728593,-155.089030
...,...,...,...,...,...
640,College Hall B,Hilo,POINT (-155.07989 19.70327),19.703268,-155.079886
641,Kealoha Beach Park,"1701 Kalanianaole Ave Hilo, HI 96720",POINT (-155.03360 19.73515),19.735152,-155.033602
642,Liko Lehua Cafe,"177 Kaumana Dr Hilo, HI 96720",POINT (-155.10633 19.71326),19.713260,-155.106330
643,Shell,"394 Kamehameha Avenue, Hilo",POINT (-155.08405 19.72312),19.723121,-155.084045


In [3]:
hilo_dist.Name.tolist()
# manually look through for recognisable fast food chains

['Hilo Bay Medical Clinic',
 'Wild Ginger Hotel',
 'Dolphin Bay Hotel',
 'Hilo Bay Oceanfront Bed and Breakfast',
 '11 Arruda Ln.',
 'JUMPSTART PRESCHOOL',
 'Puueo Cluster Head start',
 'Piihonua 4 Early Childhood',
 'Moon and Turtle',
 'Cronies Bar & Grill',
 'Hilo Town Tavern',
 'Amigos Cantina',
 'The Booch Bar',
 'Puna Chocolate Company & Cafe',
 'The Temple Bar',
 'Jojo’s Snack Shack',
 'Aloha',
 'Hilotown Hale',
 'Hilo Bay Hostel',
 'Pakalana Inn',
 'Lotus Garden of Hilo',
 'Aloha Wellness Center',
 'Palace Theater',
 'Connections Public Charter School',
 'Hilo Union School',
 'Maunaloa Elementary School',
 "Na Mana Li'i",
 'PACT Head Start',
 'Guitar Lesson With The Creative Guitar',
 'Sean Luscombe',
 'Haili Christian School',
 'Slack Key Guitar Program',
 'Backstage Hi, LLC',
 'Private Yoga Classes Hilo',
 'school of hi',
 'Tanaka Hatsume Piano Studio',
 'Gloria Mendoza-Watson Piano',
 'YWCA of Hawaii Island Developmental Preschool',
 'Lumphrey Piano Studio',
 'US Wing Chun Ha

In [4]:
name_counts = pd.DataFrame(hilo_dist['Name'].value_counts())
name_counts[name_counts['Name'] > 2]

# look for chains that might be local that I'd be unfamiliar with (do a quick google search to see what they look like)
# and also check I didn't miss any familiar ones in my earlier manual search

Unnamed: 0,Name
Pizza Hut,5
McDonald's,5
76,5
Subway,4
7-Eleven,4
L&L Hawaiian Barbecue,4
Starbucks,4
Burger King,3
Shell,3
Texaco,3


In [5]:
# list of fast food places
fast_food = ['IHOP', 'Taco Bell', 'Jamba', 'Starbucks', 'KFC', 'Jack in the Box',\
             'Pizza Hut', 'Subway', "McDonald's", 'Cinnabon', 'Burger Kind', '7-Eleven',\
             'Dairy Queen/Orange Julius Treat Ctr', 'Panda Express', "Domino's Pizza",\
             'L&L Hawaiian Barbecue', 'Minit Stop']

# create a binary variable for if a place is a fast food place based on business name
hilo_dist['fast_food'] = hilo_dist['Name'].isin(fast_food)
hilo_dist['fast_food'] = np.multiply(hilo_dist['fast_food'] , 1)

In [6]:
# load in distribution sites (not cleaned yet)
uncommon = pd.read_csv("hilo_uncommon_dist.csv")
common = pd.read_csv("hilo_common_dist.csv")
hilo_dist_dirty = pd.concat([uncommon, common])
hilo_dist_dirty = hilo_dist_dirty.rename(columns = {'name' : 'Name'})
hilo_dist_dirty

# get location variables
hilo_dirty_lat = []
hilo_dirty_long = []
for i in hilo_dist_dirty["Coordinates"]:
    dic = eval(i)
    try:
        hilo_dirty_lat.append(dic["lat"])
    except:
        hilo_dirty_lat.append(dic["latitude"])
    try:
        hilo_dirty_long.append(dic["lng"])
    except:
        hilo_dirty_long.append(dic["longitude"])

hilo_dist_dirty = hilo_dist_dirty.assign(Lat = hilo_dirty_lat, Long = hilo_dirty_long)
hilo_dist_dirty = gpd.GeoDataFrame(hilo_dist_dirty, geometry=gpd.points_from_xy(hilo_dist_dirty.Long, hilo_dist_dirty.Lat))

#select relevant variables (ones I want to merge into the cleaned data)
dist_dirty_merge = hilo_dist_dirty[['Name','Address', 'geometry',\
                                     'user_ratings_total', 'types',\
                                     'business_hours', 'business_hours_text']]
dist_dirty_merge = dist_dirty_merge.rename(columns = {'geometry':'Coordinates'})

#drop duplicate observations if there are any
dist_dirty_merge = dist_dirty_merge.drop_duplicates()

  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)


In [7]:
# merging desired vairables from dirty data into clean data
hilo_dist_detail = pd.merge(hilo_dist, dist_dirty_merge, on=['Name','Address','Coordinates'], how='left')

drop_indicator = hilo_dist_detail[['Name', 'Address']].duplicated(keep='last').tolist()
hilo_dist_detail = hilo_dist_detail.assign(Keep = drop_indicator)
hilo_dist_detail = hilo_dist_detail[hilo_dist_detail['Keep'] == False]
hilo_dist_detail = hilo_dist_detail.drop(columns = ['Keep'])
hilo_dist_detail = hilo_dist_detail.reset_index()
hilo_dist_detail

Unnamed: 0,index,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,types,business_hours,business_hours_text
0,0,Hilo Bay Medical Clinic,"24 Pukihae Street, Hilo",POINT (-155.08795 19.73014),19.730137,-155.087946,0,,"['hospital', 'health', 'point_of_interest', 'e...",,
1,1,Wild Ginger Hotel,"100 Puueo Street, Hilo",POINT (-155.08918 19.72932),19.729316,-155.089175,0,241.0,"['lodging', 'point_of_interest', 'establishment']","[{'open': {'day': 0, 'time': '0000'}}]","['Monday: Open 24 hours', 'Tuesday: Open 24 ho..."
2,2,Dolphin Bay Hotel,"333 Iliahi Street, Hilo",POINT (-155.09043 19.72923),19.729229,-155.090427,0,181.0,"['lodging', 'point_of_interest', 'establishment']",,
3,3,Hilo Bay Oceanfront Bed and Breakfast,"56 Pukihae Street, Hilo",POINT (-155.08816 19.73098),19.730976,-155.088165,0,67.0,"['lodging', 'point_of_interest', 'establishment']","[{'open': {'day': 0, 'time': '0000'}}]","['Monday: Open 24 hours', 'Tuesday: Open 24 ho..."
4,4,11 Arruda Ln.,Hilo,POINT (-155.08903 19.72859),19.728593,-155.089030,0,,"['lodging', 'point_of_interest', 'establishment']",,
...,...,...,...,...,...,...,...,...,...,...,...
640,666,College Hall B,Hilo,POINT (-155.07989 19.70327),19.703268,-155.079886,0,,"['university', 'point_of_interest', 'establish...",,
641,667,Kealoha Beach Park,"1701 Kalanianaole Ave Hilo, HI 96720",POINT (-155.03360 19.73515),19.735152,-155.033602,0,,,,
642,668,Liko Lehua Cafe,"177 Kaumana Dr Hilo, HI 96720",POINT (-155.10633 19.71326),19.713260,-155.106330,0,,,,
643,669,Shell,"394 Kamehameha Avenue, Hilo",POINT (-155.08405 19.72312),19.723121,-155.084045,0,44.0,"['gas_station', 'convenience_store', 'food', '...","[{'close': {'day': 0, 'time': '2000'}, 'open':...",['Monday: 6:00\u202fAM\u2009–\u20098:00\u202fP...


In [8]:
# creating variables for businesses that would use more foodware

hilo_dist_detail['bakery'] = 0
hilo_dist_detail['cafe'] = 0
hilo_dist_detail['meal_delivery'] = 0
hilo_dist_detail['meal_takeaway'] = 0
hilo_dist_detail['restaurant'] = 0
hilo_dist_detail['food'] = 0

for i in range(len(hilo_dist_detail)):
    types = hilo_dist_detail['types'][i]
    
    if type(types) != float:    
        if 'bakery' in types:
            hilo_dist_detail['bakery'][i] = 1
        if 'cafe' in types:
            hilo_dist_detail['cafe'] = 1
        if 'meal_delivery' in types:
            hilo_dist_detail['meal_delivery'] = 1
        if 'meal_takeaway' in types:
            hilo_dist_detail['meal_takeaway'] = 1
        if 'restaurant' in types:
            hilo_dist_detail['restaurant'][i] = 1
        if 'food' in types:
            hilo_dist_detail['food'][i] = 1
        
    if np.isnan(hilo_dist_detail['user_ratings_total'][i]):
        hilo_dist_detail['user_ratings_total'][i] = 0
            
hilo_dist_detail.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['user_ratings_total'][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['restaurant'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['food'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['bakery'][i] = 1


Unnamed: 0,index,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,types,business_hours,business_hours_text,bakery,cafe,meal_delivery,meal_takeaway,restaurant,food
0,0,Hilo Bay Medical Clinic,"24 Pukihae Street, Hilo",POINT (-155.08795 19.73014),19.730137,-155.087946,0,0.0,"['hospital', 'health', 'point_of_interest', 'e...",,,0,1,0,1,0,0
1,1,Wild Ginger Hotel,"100 Puueo Street, Hilo",POINT (-155.08918 19.72932),19.729316,-155.089175,0,241.0,"['lodging', 'point_of_interest', 'establishment']","[{'open': {'day': 0, 'time': '0000'}}]","['Monday: Open 24 hours', 'Tuesday: Open 24 ho...",0,1,0,1,0,0
2,2,Dolphin Bay Hotel,"333 Iliahi Street, Hilo",POINT (-155.09043 19.72923),19.729229,-155.090427,0,181.0,"['lodging', 'point_of_interest', 'establishment']",,,0,1,0,1,0,0
3,3,Hilo Bay Oceanfront Bed and Breakfast,"56 Pukihae Street, Hilo",POINT (-155.08816 19.73098),19.730976,-155.088165,0,67.0,"['lodging', 'point_of_interest', 'establishment']","[{'open': {'day': 0, 'time': '0000'}}]","['Monday: Open 24 hours', 'Tuesday: Open 24 ho...",0,1,0,1,0,0
4,4,11 Arruda Ln.,Hilo,POINT (-155.08903 19.72859),19.728593,-155.08903,0,0.0,"['lodging', 'point_of_interest', 'establishment']",,,0,1,0,1,0,0


In [9]:
# creating a function to determine number of hours a business is open a week (if business hours are available)
def find_hours(bus_hours):
    if type(bus_hours) != float: 
        hours = 0
        for i in range(7):

            day = "'day': {}".format(i)
            if bus_hours.count(day) == 2:
                time_open = int(bus_hours.split("'open': {'day': " + str(i) + ", 'time': '")[1][0:4])
                time_close = int(bus_hours.split("'close': {'day': " + str(i) + ", 'time': '")[1][0:4])
                if time_close < time_open:
                    time_close += 2400
                hours += round((time_close - time_open)/100)
                if (time_close - time_open) % 100 != 0:
                    min_close = int(str(time_close)[-2:])
                    min_open = int(str(time_open)[-2:])
                    hours -= (min_open - min_close)/60
            if i == 0 and bus_hours.count(day) == 1:
                return 24 * 7
        return hours

In [10]:
# using function to determine business hours in the dataset
hilo_dist_detail['hours_open'] = np.nan

for i in range(len(hilo_dist_detail)):
    hilo_dist_detail['hours_open'][i] = find_hours(hilo_dist_detail['business_hours'][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['hours_open'][i] = find_hours(hilo_dist_detail['business_hours'][i])


In [15]:
# creating convenience score to use as a weight

hilo_dist_detail['user_ratings_total'] = np.where(hilo_dist_detail['user_ratings_total'] == 0,\
                                                  1, hilo_dist_detail['user_ratings_total'])
hilo_dist_detail['hours_open'] = np.where(np.isnan(hilo_dist_detail['hours_open']),\
                                          np.nanmedian(hilo_dist_detail['hours_open']), hilo_dist_detail['hours_open'])

# normalizing variables used in score
def normalize_var(variable):
    scaler = preprocessing.MinMaxScaler()
    return scaler.fit_transform(np.array(variable).reshape(-1,1))
    
hilo_dist_detail['user_ratings_total'] = normalize_var(hilo_dist_detail['user_ratings_total'])
hilo_dist_detail['hours_open'] = normalize_var(hilo_dist_detail['hours_open'])

#creating weight
hilo_dist_detail['weight'] = (hilo_dist_detail['user_ratings_total'] + 5 * hilo_dist_detail['fast_food'] +
                             3 * hilo_dist_detail['bakery'] + 3 * hilo_dist_detail['cafe'] +
                             1.5 * hilo_dist_detail['meal_delivery'] + 2 * hilo_dist_detail['meal_takeaway'] +
                             1.5 * hilo_dist_detail['restaurant'] + 3 * hilo_dist_detail['food'] +
                             2 * hilo_dist_detail['hours_open'])

hilo_dist_detail = hilo_dist_detail[['Name', 'Address', 'Coordinates', 'Lat', 'Long',
                                     'fast_food', 'user_ratings_total', 'bakery', 'cafe',
                                     'meal_delivery', 'meal_takeaway', 'restaurant', 'food',
                                     'hours_open', 'weight']]
hilo_dist_detail

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['user_ratings_total'] = np.where(hilo_dist_detail['user_ratings_total'] == 0,\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['hours_open'] = np.where(np.isnan(hilo_dist_detail['hours_open']),\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hilo_dist_detail['user_r

Unnamed: 0,Name,Address,Coordinates,Lat,Long,fast_food,user_ratings_total,bakery,cafe,meal_delivery,meal_takeaway,restaurant,food,hours_open,weight
0,Hilo Bay Medical Clinic,"24 Pukihae Street, Hilo",POINT (-155.08795 19.73014),19.730137,-155.087946,0,0.000000,0,1,0,1,0,0,0.476190,5.952381
1,Wild Ginger Hotel,"100 Puueo Street, Hilo",POINT (-155.08918 19.72932),19.729316,-155.089175,0,0.084418,0,1,0,1,0,0,1.000000,7.084418
2,Dolphin Bay Hotel,"333 Iliahi Street, Hilo",POINT (-155.09043 19.72923),19.729229,-155.090427,0,0.063313,0,1,0,1,0,0,0.476190,6.015694
3,Hilo Bay Oceanfront Bed and Breakfast,"56 Pukihae Street, Hilo",POINT (-155.08816 19.73098),19.730976,-155.088165,0,0.023215,0,1,0,1,0,0,1.000000,7.023215
4,11 Arruda Ln.,Hilo,POINT (-155.08903 19.72859),19.728593,-155.089030,0,0.000000,0,1,0,1,0,0,0.476190,5.952381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,College Hall B,Hilo,POINT (-155.07989 19.70327),19.703268,-155.079886,0,0.000000,0,1,0,1,0,0,0.476190,5.952381
641,Kealoha Beach Park,"1701 Kalanianaole Ave Hilo, HI 96720",POINT (-155.03360 19.73515),19.735152,-155.033602,0,0.000000,0,1,0,1,0,0,0.476190,5.952381
642,Liko Lehua Cafe,"177 Kaumana Dr Hilo, HI 96720",POINT (-155.10633 19.71326),19.713260,-155.106330,0,0.000000,0,1,0,1,0,0,0.476190,5.952381
643,Shell,"394 Kamehameha Avenue, Hilo",POINT (-155.08405 19.72312),19.723121,-155.084045,0,0.015125,0,1,0,1,0,1,0.583333,9.181792


In [23]:
hilo_dist_detail.to_csv('../data/hilo_dist_detail.csv')