# Coursera Capstone Project 
## The Battle of Neighborhoods - Final Report (Week 1 and 2) 
### Sang Yoon Lee 
### Upload Libraries Required

In [1]:
import numpy as np # library to handle data in a vectorized manner
import time
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
import folium # map rendering library
from folium import plugins

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

import seaborn as sns

# import k-means from clustering stage
from sklearn.cluster import KMeans



print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.1

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    scikit-learn-0.20.1        |   py36h22eb022_0         5.7 MB
    liblapack-3.8.0            |      11_openblas          10 KB  conda-forge
    liblapacke-3.8.0           |      11_openblas          10 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    libopenblas-0.3.6          |       h5a2b251_2         7.7 MB
    scipy-1.4.1                |   py36h921218d_0        18.9 MB  conda-forge
    libcblas-3.8.0             |      11_openblas        

In [2]:
address='440 S Vermont Ave. Los Angeles, CA 90020 USA'

In [3]:
google_api_key= 'AIzaSyD4whHVTDGFlkm1s9ittOaF2Uyr4JyC8BU'

### Neighborhood Candidates

Let's create latitude & longitude coordinates for centroids of our candidate neighborhoods. We will create a grid of cells covering our area of interest which is aprox. 12x12 killometers centered around Berlin city center.

Let's first find the latitude & longitude of Galleria Market in LA K-town, using specific, well known address and Google Maps geocoding API.

In [4]:
def get_coordinates(api_key, address, verbose=False):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(api_key, address)
        response = requests.get(url).json()
        if verbose:
            print('Google Maps API JSON result =>', response)
        results = response['results']
        geographical_data = results[0]['geometry']['location'] # get geographical coordinates
        lat = geographical_data['lat']
        lon = geographical_data['lng']
        return [lat, lon]
    except:
        return [None, None]
    
address='440 S Vermont Ave. Los Angeles, CA 90020 USA'
la_center = get_coordinates(google_api_key, address)
print('Coordinate of {}: {}'.format(address, la_center))

Coordinate of 440 S Vermont Ave. Los Angeles, CA 90020 USA: [34.0656433, -118.2913283]


Now let's create a grid of area candidates, equaly spaced, centered around city center and within ~6km from Galleria Market. Our neighborhoods will be defined as circular areas with a radius of 300 meters, so our neighborhood centers will be 600 meters apart.

To accurately calculate distances we need to create our grid of locations in Cartesian 2D coordinate system which allows us to calculate distances in meters (not in latitude/longitude degrees). Then we'll project those coordinates back to latitude/longitude degrees to be shown on Folium map. So let's create functions to convert between WGS84 spherical coordinate system (latitude/longitude degrees) and UTM Cartesian coordinate system (X/Y coordinates in  meters).

In [5]:
!pip install shapely
import shapely.geometry

!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

print('Coordinate transformation check')
print('-------------------------------')
print('LA K-town center longitude={}, latitude={}'.format(la_center[1], la_center[0]))
x, y = lonlat_to_xy(la_center[1], la_center[0])
print('LA K-town center UTM X={}, Y={}'.format(x, y))
lo, la = xy_to_lonlat(x, y)
print('LA K-town center longitude={}, latitude={}'.format(lo, la))

Collecting shapely
[?25l  Downloading https://files.pythonhosted.org/packages/38/b6/b53f19062afd49bb5abd049aeed36f13bf8d57ef8f3fa07a5203531a0252/Shapely-1.6.4.post2-cp36-cp36m-manylinux1_x86_64.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 31.0MB/s eta 0:00:01
[?25hInstalling collected packages: shapely
Successfully installed shapely-1.6.4.post2
Coordinate transformation check
-------------------------------
LA K-town center longitude=-118.2913283, latitude=34.0656433
LA K-town center UTM X=-3954281.623452942, Y=15051394.804949965
LA K-town center longitude=-118.2913283, latitude=34.065643300000005


In [6]:
la_center_x, la_center_y = lonlat_to_xy(la_center[1], la_center[0]) # Korean town center in Cartesian coordinates

k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_min = la_center_x - 6000
x_step = 600
y_min = la_center_y - 6000 - (int(21/k)*k*600 - 12000)/2
y_step = 600 * k 

latitudes = []
longitudes = []
distances_from_center = []
xs = []
ys = []
for i in range(0, int(21/k)):
    y = y_min + i * y_step
    x_offset = 300 if i%2==0 else 0
    for j in range(0, 21):
        x = x_min + j * x_step + x_offset
        distance_from_center = calc_xy_distance(la_center_x, la_center_y, x, y)
        if (distance_from_center <= 6001):
            lon, lat = xy_to_lonlat(x, y)
            latitudes.append(lat)
            longitudes.append(lon)
            distances_from_center.append(distance_from_center)
            xs.append(x)
            ys.append(y)

print(len(latitudes), 'candidate neighborhood centers generated.')

364 candidate neighborhood centers generated.


Let's visualize the data we have so far: city center location and candidate neighborhood centers:

In [7]:
map_la= folium.Map(location=la_center, zoom_start=13)
folium.Marker(la_center, popup='Galeria Market').add_to(map_la)
for lat, lon in zip(latitudes, longitudes):
    #folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
    folium.Circle([lat, lon], radius=300, color='blue', fill=False).add_to(map_la)
    #folium.Marker([lat, lon]).add_to(map_berlin)
map_la

OK, we now have the coordinates of centers of neighborhoods/areas to be evaluated, equally spaced (distance from every point to it's neighbors is exactly the same) and within ~6km from Alexanderplatz. 

Let's now use Google Maps API to get approximate addresses of those locations.

In [8]:
def get_address(api_key, latitude, longitude, verbose=False):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&latlng={},{}'.format(api_key, latitude, longitude)
        response = requests.get(url).json()
        if verbose:
            print('Google Maps API JSON result =>', response)
        results = response['results']
        address = results[0]['formatted_address']
        return address
    except:
        return None

addr = get_address(google_api_key, la_center[0], la_center[1])
print('Reverse geocoding check')
print('-----------------------')
print('Address of [{}, {}] is: {}'.format(la_center[0], la_center[1], addr))

Reverse geocoding check
-----------------------
Address of [34.0656433, -118.2913283] is: 440s Vermont Ave, Los Angeles, CA 90020, USA


In [9]:
neighborhood_latitude=34.0656433
neighborhood_longitude=-118.2913283

In [10]:
print('Obtaining location addresses: ', end='')
addresses = []
for lat, lon in zip(latitudes, longitudes):
    address = get_address(google_api_key, lat, lon)
    if address is None:
        address = 'NO ADDRESS'
    address = address.replace(', Korea town', '') # We don't need country part of address
    addresses.append(address)
    print(' .', end='')
print(' done.')

Obtaining location addresses:  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [11]:
addresses[150:170]

['6000 Santa Monica Blvd, Los Angeles, CA 90038, USA',
 '6201 Santa Monica Blvd, Los Angeles, CA 90038, USA',
 '1225 N Cahuenga Blvd, Los Angeles, CA 90038, USA',
 '220 W 4th St, Los Angeles, CA 90013, USA',
 '400 Hope Pl, Los Angeles, CA 90071, USA',
 '445 S Figueroa St, Los Angeles, CA 90071, USA',
 '1329 W 5th St, Los Angeles, CA 90017, USA',
 '1529 W 4th St, Los Angeles, CA 90017, USA',
 '440 S Bonnie Brae St, Los Angeles, CA 90057, USA',
 '422 S Grand View St, Los Angeles, CA 90057, USA',
 '423 1/4 S Rampart Blvd, Los Angeles, CA 90057, USA',
 '3016 W 4th St, Los Angeles, CA 90020, USA',
 '3400 W 3rd St, Los Angeles, CA 90020, USA',
 '207 S New Hampshire Ave, Los Angeles, CA 90004, USA',
 '121 N Edgemont St, Los Angeles, CA 90004, USA',
 '206 N Kingsley Dr, Los Angeles, CA 90004, USA',
 '400 N Serrano Ave, Los Angeles, CA 90004, USA',
 '410 St Andrews Pl, Los Angeles, CA 90004, USA',
 '622 N Van Ness Ave, Los Angeles, CA 90038, USA',
 '5515 Melrose Ave, Los Angeles, CA 90038, USA'

In [50]:
import pandas as pd

df_locations = pd.DataFrame({'Address': addresses,
                             'Latitude': latitudes,
                             'Longitude': longitudes,
                             'X': xs,
                             'Y': ys,
                             'Distance from center': distances_from_center})

df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center
0,"2307 Lake Shore Ave, Los Angeles, CA 90039, USA",34.0943,-118.252663,-3956082.0,15045680.0,5992.495307
1,"2258 Hidalgo Ave, Los Angeles, CA 90039, USA",34.096512,-118.257114,-3955482.0,15045680.0,5840.3767
2,"Silver Lake Meadows, 2300 Silver Lake Blvd, Los Angeles, CA 90039, USA",34.098724,-118.261566,-3954882.0,15045680.0,5747.173218
3,"Silver Lake / Hawick, Los Angeles, CA 90039, USA",34.100936,-118.266018,-3954282.0,15045680.0,5715.767665
4,"2447 Lanterman Terrace, Los Angeles, CA 90039, USA",34.103148,-118.270471,-3953682.0,15045680.0,5747.173218
5,"3765 Tracy St, Los Angeles, CA 90027, USA",34.10536,-118.274924,-3953082.0,15045680.0,5840.3767
6,"4013 Holly Knoll Dr, Los Angeles, CA 90027, USA",34.107572,-118.279378,-3952482.0,15045680.0,5992.495307
7,"2025 Avon St, Los Angeles, CA 90026, USA",34.087775,-118.24829,-3956982.0,15046200.0,5855.766389
8,"2036 Lemoyne St, Los Angeles, CA 90026, USA",34.089987,-118.25274,-3956382.0,15046200.0,5604.462508
9,"2024 Allesandro St, Los Angeles, CA 90039, USA",34.092199,-118.257191,-3955782.0,15046200.0,5408.326913


Looking good. Let's now place all this into a Pandas dataframe.

In [51]:
df_locations.to_pickle('./locations.pkl')

### Foursquare
Now that we have our location candidates, let's use Foursquare API to get info on restaurants in each neighborhood.

We're interested in venues in 'food' category, but only those that are proper restaurants - coffe shops, pizza places, bakeries etc. are not direct competitors so we don't care about those. So we will include in out list only venues that have 'restaurant' in category name, and we'll make sure to detect and include all the subcategories of specific 'Italian restaurant' category, as we need info on Italian restaurants in the neighborhood.

In [52]:
foursquare_client_id = 'AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK'
foursquare_client_secret = 'TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA'

In [53]:
address = '440 S Vermont Ave. Los Angeles, CA 90020 USA'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

34.0655794 -118.290934


In [54]:
search_query = 'Korean'
radius = 500
print(search_query + ' .... OK!')

Korean .... OK!


In [55]:
CLIENT_ID = 'AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK' # your Foursquare ID
CLIENT_SECRET = 'TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA' # your Foursquare Secret
VERSION = '20200116' # Foursquare API version
LIMIT = 500
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK
CLIENT_SECRET:TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA


In [56]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK&client_secret=TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA&ll=34.0655794,-118.290934&v=20200116&query=Korean&radius=500&limit=500'

In [57]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e292ca0963d29001b61b74d'},
 'response': {'venues': [{'id': '532c98cd498e071f08dc7316',
    'name': 'Bak Kung Korean BBQ 2',
    'location': {'address': '233 S Vermont Ave',
     'lat': 34.06954248605497,
     'lng': -118.29186759197351,
     'labeledLatLngs': [{'label': 'display',
       'lat': 34.06954248605497,
       'lng': -118.29186759197351}],
     'distance': 449,
     'postalCode': '90004',
     'cc': 'US',
     'city': 'Los Angeles',
     'state': 'CA',
     'country': 'United States',
     'formattedAddress': ['233 S Vermont Ave',
      'Los Angeles, CA 90004',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d113941735',
      'name': 'Korean Restaurant',
      'pluralName': 'Korean Restaurants',
      'shortName': 'Korean',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/korean_',
       'suffix': '.png'},
      'primary': True}],
    'venuePage': {'id': '82784605'},
    'referralId': 'v-1579756860

In [59]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,venuePage.id,location.crossStreet
0,532c98cd498e071f08dc7316,Bak Kung Korean BBQ 2,"[{'id': '4bf58dd8d48988d113941735', 'name': 'Korean Restaurant', 'pluralName': 'Korean Restaurants', 'shortName': 'Korean', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/korean_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,233 S Vermont Ave,34.069542,-118.291868,"[{'label': 'display', 'lat': 34.06954248605497, 'lng': -118.29186759197351}]",449,90004.0,US,Los Angeles,CA,United States,"[233 S Vermont Ave, Los Angeles, CA 90004, United States]",82784605.0,
1,52bc8ee211d2f7e69402bd76,Korean Gospel Broadcast company,"[{'id': '50328a8e91d4c4b30a586d6c', 'name': 'Non-Profit', 'pluralName': 'Non-Profits', 'shortName': 'Non-Profit', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,,34.062857,-118.287439,"[{'label': 'display', 'lat': 34.062857, 'lng': -118.28743899999999}]",442,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,
2,4ce76ce2e888f04d05543c6b,Korean Philadelphia Presbyterian Church,"[{'id': '4bf58dd8d48988d132941735', 'name': 'Church', 'pluralName': 'Churches', 'shortName': 'Church', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/religious_church_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,407 S New Hampshire Ave,34.067122,-118.29329,"[{'label': 'display', 'lat': 34.067122, 'lng': -118.29328999999998}]",276,90020.0,US,Los Angeles,CA,United States,"[407 S New Hampshire Ave, Los Angeles, CA 90020, United States]",,
3,4d94b17b1646a35d818f3ca3,Korean Herold Times,"[{'id': '4bf58dd8d48988d124941735', 'name': 'Office', 'pluralName': 'Offices', 'shortName': 'Office', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,505 S Virgil Ave,34.064056,-118.287812,"[{'label': 'display', 'lat': 34.06405592, 'lng': -118.28781165}]",334,90020.0,US,Los Angeles,CA,United States,"[505 S Virgil Ave (5th), Los Angeles, CA 90020, United States]",,5th
4,530673cf498ec0f412c596eb,Korean Adventist Press,"[{'id': '4bf58dd8d48988d124941735', 'name': 'Office', 'pluralName': 'Offices', 'shortName': 'Office', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,,34.063084,-118.293213,"[{'label': 'display', 'lat': 34.063084, 'lng': -118.293213}]",348,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,
5,582f54049900e65845ebab74,Sa Rit Gol Korean Restaurant (싸릿골),"[{'id': '4bf58dd8d48988d113941735', 'name': 'Korean Restaurant', 'pluralName': 'Korean Restaurants', 'shortName': 'Korean', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/korean_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,,34.063469,-118.295266,"[{'label': 'display', 'lat': 34.063469, 'lng': -118.295266}]",463,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,
6,4c336fa2a0ced13a4b9f166e,Consulate General of the Republic Of Korea,"[{'id': '4bf58dd8d48988d12c951735', 'name': 'Embassy / Consulate', 'pluralName': 'Embassies / Consulates', 'shortName': 'Embassy', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/travel/embassy_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,3243 Wilshire Blvd,34.061993,-118.292735,"[{'label': 'display', 'lat': 34.06199316125548, 'lng': -118.29273534046122}]",432,90010.0,US,Los Angeles,CA,United States,"[3243 Wilshire Blvd (btwn Vermont Ave & New Hampshire Ave.), Los Angeles, CA 90010, United States]",,btwn Vermont Ave & New Hampshire Ave.
7,4e34bcc918a82fdd656f0839,Myeong dong Donkatsu,"[{'id': '4bf58dd8d48988d113941735', 'name': 'Korean Restaurant', 'pluralName': 'Korean Restaurants', 'shortName': 'Korean', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/korean_', 'suffix': '.png'}, 'primary': True}]",v-1579756860,False,698 S Vermont Ave,34.060234,-118.291203,"[{'label': 'display', 'lat': 34.060234, 'lng': -118.291203}]",595,90005.0,US,Los Angeles,CA,United States,"[698 S Vermont Ave (7th), Los Angeles, CA 90005, United States]",,7th


In [21]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id
0,Bak Kung Korean BBQ 2,Korean Restaurant,233 S Vermont Ave,34.069542,-118.291868,"[{'label': 'display', 'lat': 34.06954248605497...",449,90004.0,US,Los Angeles,CA,United States,"[233 S Vermont Ave, Los Angeles, CA 90004, Uni...",,532c98cd498e071f08dc7316
1,Korean Gospel Broadcast company,Non-Profit,,34.062857,-118.287439,"[{'label': 'display', 'lat': 34.062857, 'lng':...",442,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,52bc8ee211d2f7e69402bd76
2,Korean Philadelphia Presbyterian Church,Church,407 S New Hampshire Ave,34.067122,-118.29329,"[{'label': 'display', 'lat': 34.067122, 'lng':...",276,90020.0,US,Los Angeles,CA,United States,"[407 S New Hampshire Ave, Los Angeles, CA 9002...",,4ce76ce2e888f04d05543c6b
3,Korean Herold Times,Office,505 S Virgil Ave,34.064056,-118.287812,"[{'label': 'display', 'lat': 34.06405592, 'lng...",334,90020.0,US,Los Angeles,CA,United States,"[505 S Virgil Ave (5th), Los Angeles, CA 90020...",5th,4d94b17b1646a35d818f3ca3
4,Korean Adventist Press,Office,,34.063084,-118.293213,"[{'label': 'display', 'lat': 34.063084, 'lng':...",348,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,530673cf498ec0f412c596eb
5,Sa Rit Gol Korean Restaurant (싸릿골),Korean Restaurant,,34.063469,-118.295266,"[{'label': 'display', 'lat': 34.063469, 'lng':...",463,,US,Los Angeles,CA,United States,"[Los Angeles, CA, United States]",,582f54049900e65845ebab74
6,Consulate General of the Republic Of Korea,Embassy / Consulate,3243 Wilshire Blvd,34.061993,-118.292735,"[{'label': 'display', 'lat': 34.06199316125548...",432,90010.0,US,Los Angeles,CA,United States,[3243 Wilshire Blvd (btwn Vermont Ave & New Ha...,btwn Vermont Ave & New Hampshire Ave.,4c336fa2a0ced13a4b9f166e
7,Myeong dong Donkatsu,Korean Restaurant,698 S Vermont Ave,34.060234,-118.291203,"[{'label': 'display', 'lat': 34.060234, 'lng':...",595,90005.0,US,Los Angeles,CA,United States,"[698 S Vermont Ave (7th), Los Angeles, CA 9000...",7th,4e34bcc918a82fdd656f0839


In [36]:
dataframe_filtered.name

0                         Bak Kung Korean BBQ 2
1               Korean Gospel Broadcast company
2       Korean Philadelphia Presbyterian Church
3                           Korean Herold Times
4                        Korean Adventist Press
5            Sa Rit Gol Korean Restaurant (싸릿골)
6    Consulate General of the Republic Of Korea
7                          Myeong dong Donkatsu
Name: name, dtype: object

In [37]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Galeria Market
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Conrad Hotel',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Korean restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

## 2. Explore a Given Venue
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**

### A. Let's explore the closest Korean restaurant -- _Saritgol Korean Restaurant_

In [61]:
venue_id = '582f54049900e65845ebab74' # ID of Sa Rit Gol Korean Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/582f54049900e65845ebab74?client_id=AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK&client_secret=TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA&v=20200116'

In [62]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'price', 'likes', 'dislike', 'ok', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '582f54049900e65845ebab74',
 'name': 'Sa Rit Gol Korean Restaurant (싸릿골)',
 'contact': {},
 'location': {'lat': 34.063469,
  'lng': -118.295266,
  'labeledLatLngs': [{'label': 'display',
    'lat': 34.063469,
    'lng': -118.295266}],
  'cc': 'US',
  'city': 'Los Angeles',
  'state': 'CA',
  'country': 'United States',
  'formattedAddress': ['Los Angeles, CA', 'United States']},
 'canonicalUrl': 'https://foursquare.com/v/sa-rit-gol-korean-restaurant-%EC%8B%B8%EB%A6%BF%EA%B3%A8/582f54049900e65845ebab74',
 'categories': [{'id': '4bf58dd8d48988d113941735',
   'name': 'Korean Restaurant',
   'pluralName': 'Korean Restaurants',
   'shortName': 'Korean',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/korean_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats': {'tipCount': 0},
 'price': {'tier': 2, 'message': 'Moderate', 'currency': '$'},
 'likes': {'count': 1,
  'groups': [{'type': 'others',
    'count': 1,
    'items': [{'id': '11229757',


In [63]:
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

This venue has not been rated yet.


In [64]:
venue_id = '582f54049900e65845ebab74' # ID of Conca Korean Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

This venue has not been rated yet.


In [65]:
venue_id = '4e34bcc918a82fdd656f0839' # ID of Ecco
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

5.8


### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [66]:
## Ecco Tips
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e292efcdf2774001b01fdcc'},
 'response': {'tips': {'count': 4,
   'items': [{'id': '4e6155ab52b1260c12a5bb71',
     'createdAt': 1315001771,
     'text': 'Very very nice decor and service! They offer refill on your soup, salad and rice. Make sure to try the katsu sauce with the spicy mustard!',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/4e6155ab52b1260c12a5bb71',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount': 0,
     'disagreeCount': 0,
     'todo': {'count': 1},
     'user': {'id': '3728675',
      'firstName': 'Missy',
      'lastName': 'A',
      'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       'suffix': '/3728675-XQCFGOBVQ2BVF2GV.jpg'}}}]}}}

#### Get tips and list of associated features

In [67]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user'])

In [68]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Very very nice decor and service! They offer refill on your soup, salad and rice. Make sure to try the katsu sauce with the spicy mustard!",0,0,4e6155ab52b1260c12a5bb71,Missy,A,,3728675


In [69]:
foursquare_client_id = 'AVBBIOYVEQWFRHN4QOOIVRWGT3Z2KZ4J3F3XMWDSLDP40RBK'
foursquare_client_secret = 'TO0JOVZBLM4YH5QZJDR1ACGLRWDLWSRL3RU5SUNTCALWGCMA'

Now remember that because we are using a personal developer account, then we can access only 2 of the restaurant's tips, instead of all 15 tips.

In [71]:
# Category IDs corresponding to Korean restaurants were taken from Foursquare web site (https://developer.foursquare.com/docs/resources/categories):

food_category = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues

korean_restaurant_categories = ['4e34bcc918a82fdd656f0839','532c98cd498e071f08dc7316','582f54049900e65845ebab74']
                                 

def is_restaurant(categories, specific_filter=None):
    restaurant_words = ['restaurant', 'diner', 'taverna', 'steakhouse']
    restaurant = False
    specific = False
    for c in categories:
        category_name = c[0].lower()
        category_id = c[1]
        for r in restaurant_words:
            if r in category_name:
                restaurant = True
        if 'fast food' in category_name:
            restaurant = False
        if not(specific_filter is None) and (category_id in specific_filter):
            specific = True
            restaurant = True
    return restaurant, specific

def get_categories(categories):
    return [(cat['name'], cat['id']) for cat in categories]

def format_address(location):
    address = ', '.join(location['formattedAddress'])
    address = address.replace(', Deutschland', '')
    address = address.replace(', Germany', '')
    return address

def get_venues_near_location(lat, lon, category, client_id, client_secret, radius=500, limit=100):
    version = '20180724'
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
        client_id, client_secret, version, lat, lon, category, radius, limit)
    try:
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues = [(item['venue']['id'],
                   item['venue']['name'],
                   get_categories(item['venue']['categories']),
                   (item['venue']['location']['lat'], item['venue']['location']['lng']),
                   format_address(item['venue']['location']),
                   item['venue']['location']['distance']) for item in results]        
    except:
        venues = []
    return venues

In [95]:
# Let's now go over our neighborhood locations and get nearby restaurants; we'll also maintain a dictionary of all found restaurants and all found italian restaurants

import pickle

def get_restaurants(lats, lons):
    restaurants = {}
    korean_restaurants = {}
    location_restaurants = []

    print('Obtaining venues around candidate locations:', end='')
    for lat, lon in zip(lats, lons):
        # Using radius=350 to meke sure we have overlaps/full coverage so we don't miss any restaurant (we're using dictionaries to remove any duplicates resulting from area overlaps)
        venues = get_venues_near_location(lat, lon, food_category, foursquare_client_id, foursquare_client_secret, radius=350, limit=100)
        area_restaurants = []
        for venue in venues:
            venue_id = venue[0]
            venue_name = venue[1]
            venue_categories = venue[2]
            venue_latlon = venue[3]
            venue_address = venue[4]
            venue_distance = venue[5]
            is_res, is_korean = is_restaurant(venue_categories, specific_filter=korean_restaurant_categories)
            if is_res:
                x, y = lonlat_to_xy(venue_latlon[1], venue_latlon[0])
                restaurant = (venue_id, venue_name, venue_latlon[0], venue_latlon[1], venue_address, venue_distance, is_korean, x, y)
                if venue_distance<=300:
                    area_restaurants.append(restaurant)
                restaurants[venue_id] = restaurant
                if is_korean:
                    korean_restaurants[venue_id] = restaurant
        location_restaurants.append(area_restaurants)
        print(' .', end='')
    print(' done.')
    return restaurants, korean_restaurants, location_restaurants

# Try to load from local file system in case we did this before
restaurants = {}
korean_restaurants = {}
location_restaurants = []
loaded = False
try:
    with open('restaurants_350.pkl', 'rb') as f:
        restaurants = pickle.load(f)
    with open('korean_restaurants_350.pkl', 'rb') as f:
        korean_restaurants = pickle.load(f)
    with open('location_restaurants_350.pkl', 'rb') as f:
        location_restaurants = pickle.load(f)
    print('Restaurant data loaded.')
    loaded = True
except:
    pass

# If load failed use the Foursquare API to get the data
if not loaded:
    restaurants, korean_restaurants, location_restaurants = get_restaurants(latitudes, longitudes)
    
    # Let's persists this in local file system
    with open('restaurants_350.pkl', 'wb') as f:
        pickle.dump(restaurants, f)
    with open('korean_restaurants_350.pkl', 'wb') as f:
        pickle.dump(korean_restaurants, f)
    with open('location_restaurants_350.pkl', 'wb') as f:
        pickle.dump(location_restaurants, f)
        

Obtaining venues around candidate locations: . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [96]:
import numpy as np

print('Total number of restaurants:', len(restaurants))
print('Total number of Korean restaurants:', len(korean_restaurants))
print('Percentage of Korean restaurants: {:.2f}%'.format(len(korean_restaurants) / len(restaurants) * 100))
print('Average number of restaurants in neighborhood:', np.array([len(r) for r in location_restaurants]).mean())

Total number of restaurants: 1369
Total number of Korean restaurants: 0
Percentage of Korean restaurants: 0.00%
Average number of restaurants in neighborhood: 4.758241758241758


In [97]:
print('List of all restaurants')
print('-----------------------')
for r in list(restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(restaurants))

List of all restaurants
-----------------------
('3fd66200f964a520e6ee1ee3', 'Red Lion Tavern', 34.099282428547504, -118.25902187466636, '2366 Glendale Blvd, Los Angeles, CA 90039, United States', 242, False, -3955094.5204027444, 15045461.429397812)
('49e399a4f964a5209e621fe3', 'Gingergrass', 34.10014595068481, -118.2590501003113, '2396 Glendale Blvd (at Brier Ave), Los Angeles, CA 90039, United States', 280, False, -3955030.124358452, 15045359.996851612)
('55aaf72b498ee3c73c4d9afd', 'Secret Chinese Delivery', 34.099940000000004, -118.25896000000002, '2394 Glendale Blvd, Los Angeles, CA 90039, United States', 275, False, -3955053.7638833113, 15045379.24171272)
('508b54aee4b06f6d18c6051c', 'Hyperion Public', 34.10539989051998, -118.27272749499147, '2538 Hyperion Ave, Los Angeles, CA 90027, United States', 202, False, -3953296.979802003, 15045544.054241747)
('4b0ca08df964a520584023e3', 'Pazzaz Sushi', 34.105002498504575, -118.27298218835494, '2524 Hyperion Ave (Tracy St.), Los Angeles, C

In [98]:
print('List of Korean restaurants')
print('---------------------------')
for r in list(korean_restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(korean_restaurants))

List of Korean restaurants
---------------------------
...
Total: 0


In [99]:
print('Restaurants around location')
print('---------------------------')
for i in range(100, 110):
    rs = location_restaurants[i][:8]
    names = ', '.join([r[1] for r in rs])
    print('Restaurants around location {}: {}'.format(i+1, names))

Restaurants around location
---------------------------
Restaurants around location 101: Tamales Alberto, My Taco Corner, Bangkok Express
Restaurants around location 102: Kapistahan Grill, Ostioneria Colima # 2, Chibogs, Derby Dolls Corner Taco Man, El Majahual Pupuseria
Restaurants around location 103: Bahay Kubo, Nanay Gloria's Restaurant
Restaurants around location 104: Los Molcajetes, El Criollo Market
Restaurants around location 105: El Caserio, Las Glorias
Restaurants around location 106: Jewel, Hollywood Blvd-Thai Town
Restaurants around location 107: Wah's Golden Hen
Restaurants around location 108: The Faculty, California Bowl, Golfo De Fonseca, Rincon Chileno
Restaurants around location 109: 
Restaurants around location 110: Sasoun Bakery


Let's now see all the collected restaurants in our area of interest on map, and let's also show Italian restaurants in different color.

In [101]:
map_la = folium.Map(location=la_center, zoom_start=13)
folium.Marker(la_center, popup='Galeria').add_to(map_la)
for res in restaurants.values():
    lat = res[2]; lon = res[3]
    is_korean = res[6]
    color = 'red' if is_italian else 'blue'
    folium.CircleMarker([lat, lon], radius=3, color=color, fill=True, fill_color=color, fill_opacity=1).add_to(map_la)
map_la

## Methodology <a name="methodology"></a>

## Analysis <a name="analysis"></a>

Let's perform some basic explanatory data analysis and derive some additional info from our raw data. First let's count the **number of restaurants in every area candidate**:

In [102]:
location_restaurants_count = [len(res) for res in location_restaurants]

df_locations['Restaurants in area'] = location_restaurants_count

print('Average number of restaurants in every area with radius=300m:', np.array(location_restaurants_count).mean())

df_locations.head(10)

Average number of restaurants in every area with radius=300m: 4.758241758241758


Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Korean restaurant
0,"2307 Lake Shore Ave, Los Angeles, CA 90039, USA",34.0943,-118.252663,-3956082.0,15045680.0,5992.495307,0,10000
1,"2258 Hidalgo Ave, Los Angeles, CA 90039, USA",34.096512,-118.257114,-3955482.0,15045680.0,5840.3767,0,10000
2,"Silver Lake Meadows, 2300 Silver Lake Blvd, Los Angeles, CA 90039, USA",34.098724,-118.261566,-3954882.0,15045680.0,5747.173218,3,10000
3,"Silver Lake / Hawick, Los Angeles, CA 90039, USA",34.100936,-118.266018,-3954282.0,15045680.0,5715.767665,0,10000
4,"2447 Lanterman Terrace, Los Angeles, CA 90039, USA",34.103148,-118.270471,-3953682.0,15045680.0,5747.173218,1,10000
5,"3765 Tracy St, Los Angeles, CA 90027, USA",34.10536,-118.274924,-3953082.0,15045680.0,5840.3767,8,10000
6,"4013 Holly Knoll Dr, Los Angeles, CA 90027, USA",34.107572,-118.279378,-3952482.0,15045680.0,5992.495307,1,10000
7,"2025 Avon St, Los Angeles, CA 90026, USA",34.087775,-118.24829,-3956982.0,15046200.0,5855.766389,0,10000
8,"2036 Lemoyne St, Los Angeles, CA 90026, USA",34.089987,-118.25274,-3956382.0,15046200.0,5604.462508,0,10000
9,"2024 Allesandro St, Los Angeles, CA 90039, USA",34.092199,-118.257191,-3955782.0,15046200.0,5408.326913,0,10000


OK, now let's calculate the **distance to nearest Korean restaurant from every area candidate center** (not only those within 300m - we want distance to closest one, regardless of how distant it is).

In [103]:
distances_to_korean_restaurant = []

for area_x, area_y in zip(xs, ys):
    min_distance = 10000
    for res in korean_restaurants.values():
        res_x = res[7]
        res_y = res[8]
        d = calc_xy_distance(area_x, area_y, res_x, res_y)
        if d<min_distance:
            min_distance = d
    distances_to_korean_restaurant.append(min_distance)

df_locations['Distance to Korean restaurant'] = distances_to_korean_restaurant

In [104]:
df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Korean restaurant
0,"2307 Lake Shore Ave, Los Angeles, CA 90039, USA",34.0943,-118.252663,-3956082.0,15045680.0,5992.495307,0,10000
1,"2258 Hidalgo Ave, Los Angeles, CA 90039, USA",34.096512,-118.257114,-3955482.0,15045680.0,5840.3767,0,10000
2,"Silver Lake Meadows, 2300 Silver Lake Blvd, Los Angeles, CA 90039, USA",34.098724,-118.261566,-3954882.0,15045680.0,5747.173218,3,10000
3,"Silver Lake / Hawick, Los Angeles, CA 90039, USA",34.100936,-118.266018,-3954282.0,15045680.0,5715.767665,0,10000
4,"2447 Lanterman Terrace, Los Angeles, CA 90039, USA",34.103148,-118.270471,-3953682.0,15045680.0,5747.173218,1,10000
5,"3765 Tracy St, Los Angeles, CA 90027, USA",34.10536,-118.274924,-3953082.0,15045680.0,5840.3767,8,10000
6,"4013 Holly Knoll Dr, Los Angeles, CA 90027, USA",34.107572,-118.279378,-3952482.0,15045680.0,5992.495307,1,10000
7,"2025 Avon St, Los Angeles, CA 90026, USA",34.087775,-118.24829,-3956982.0,15046200.0,5855.766389,0,10000
8,"2036 Lemoyne St, Los Angeles, CA 90026, USA",34.089987,-118.25274,-3956382.0,15046200.0,5604.462508,0,10000
9,"2024 Allesandro St, Los Angeles, CA 90039, USA",34.092199,-118.257191,-3955782.0,15046200.0,5408.326913,0,10000


In [105]:
print('Average distance to closest Korean restaurant from each area center:', df_locations['Distance to Korean restaurant'].mean())

Average distance to closest Korean restaurant from each area center: 10000.0


In [109]:
from folium import plugins
from folium.plugins import HeatMap

map_la = folium.Map(location=la_center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_la) #cartodbpositron cartodbdark_matter
HeatMap(restaurant_latlons).add_to(map_la)
folium.Marker(la_center).add_to(map_la)
folium.Circle(la_center, radius=1000, fill=False, color='white').add_to(map_la)
folium.Circle(la_center, radius=2000, fill=False, color='white').add_to(map_la)
folium.Circle(la_center, radius=3000, fill=False, color='white').add_to(map_la)
map_la

In [113]:
roi_x_min = la_center_x - 2000
roi_y_max = la_center_y + 1000
roi_width = 5000
roi_height = 5000
roi_center_x = roi_x_min + 2500
roi_center_y = roi_y_max - 2500
roi_center_lon, roi_center_lat = xy_to_lonlat(roi_center_x, roi_center_y)
roi_center = [roi_center_lat, roi_center_lon]

map_la = folium.Map(location=roi_center, zoom_start=14)
HeatMap(restaurant_latlons).add_to(map_la)
folium.Marker(la_center).add_to(map_la)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_la)

map_la

In [114]:
k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_step = 100
y_step = 100 * k 
roi_y_min = roi_center_y - 2500

roi_latitudes = []
roi_longitudes = []
roi_xs = []
roi_ys = []
for i in range(0, int(51/k)):
    y = roi_y_min + i * y_step
    x_offset = 50 if i%2==0 else 0
    for j in range(0, 51):
        x = roi_x_min + j * x_step + x_offset
        d = calc_xy_distance(roi_center_x, roi_center_y, x, y)
        if (d <= 2501):
            lon, lat = xy_to_lonlat(x, y)
            roi_latitudes.append(lat)
            roi_longitudes.append(lon)
            roi_xs.append(x)
            roi_ys.append(y)

print(len(roi_latitudes), 'candidate neighborhood centers generated.')

2261 candidate neighborhood centers generated.


In [116]:
def count_restaurants_nearby(x, y, restaurants, radius=250):    
    count = 0
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=radius:
            count += 1
    return count

def find_nearest_restaurant(x, y, restaurants):
    d_min = 100000
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=d_min:
            d_min = d
    return d_min

roi_restaurant_counts = []
roi_korean_distances = []

print('Generating data on location candidates... ', end='')
for x, y in zip(roi_xs, roi_ys):
    count = count_restaurants_nearby(x, y, restaurants, radius=250)
    roi_restaurant_counts.append(count)
    distance = find_nearest_restaurant(x, y, korean_restaurants)
    roi_korean_distances.append(distance)
print('done.')

Generating data on location candidates... done.


In [119]:
# Let's put this into dataframe
df_roi_locations = pd.DataFrame({'Latitude':roi_latitudes,
                                 'Longitude':roi_longitudes,
                                 'X':roi_xs,
                                 'Y':roi_ys,
                                 'Restaurants nearby':roi_restaurant_counts,
                                 'Distance to Korean restaurant':roi_korean_distances})

df_roi_locations.head(10)

Unnamed: 0,Latitude,Longitude,X,Y,Restaurants nearby,Distance to Korean restaurant
0,34.092002,-118.27696,-3953832.0,15047390.0,5,100000
1,34.092371,-118.277702,-3953732.0,15047390.0,8,100000
2,34.089441,-118.273262,-3954382.0,15047480.0,0,100000
3,34.089809,-118.274004,-3954282.0,15047480.0,0,100000
4,34.090178,-118.274746,-3954182.0,15047480.0,1,100000
5,34.090546,-118.275488,-3954082.0,15047480.0,4,100000
6,34.090915,-118.27623,-3953982.0,15047480.0,9,100000
7,34.091283,-118.276973,-3953882.0,15047480.0,6,100000
8,34.091652,-118.277715,-3953782.0,15047480.0,10,100000
9,34.09202,-118.278457,-3953682.0,15047480.0,11,100000


OK. Now let's calculate two most important things for each location candidate: **number of restaurants in vicinity** (we'll use radius of **250 meters**) and **distance to closest Korean restaurant**.

In [120]:
good_res_count = np.array((df_roi_locations['Restaurants nearby']<=2))
print('Locations with no more than two restaurants nearby:', good_res_count.sum())

good_kor_distance = np.array(df_roi_locations['Distance to Korean restaurant']>=400)
print('Locations with no Korean restaurants within 400m:', good_kor_distance.sum())

good_locations = np.logical_and(good_res_count, good_kor_distance)
print('Locations with both conditions met:', good_locations.sum())

df_good_locations = df_roi_locations[good_locations]

Locations with no more than two restaurants nearby: 1484
Locations with no Korean restaurants within 400m: 2261
Locations with both conditions met: 1484


In [122]:
good_latitudes = df_good_locations['Latitude'].values
good_longitudes = df_good_locations['Longitude'].values

good_locations = [[lat, lon] for lat, lon in zip(good_latitudes, good_longitudes)]

map_la = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_la)
HeatMap(restaurant_latlons).add_to(map_la)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.6).add_to(map_la)
folium.Marker(la_center).add_to(map_la)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_la) 

map_la

In [123]:
map_la = folium.Map(location=roi_center, zoom_start=14)
HeatMap(good_locations, radius=25).add_to(map_la)
folium.Marker(la_center).add_to(map_la)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_la)

map_la

In [125]:
from sklearn.cluster import KMeans

number_of_clusters = 15

good_xys = df_good_locations[['X', 'Y']].values
kmeans = KMeans(n_clusters=number_of_clusters, random_state=0).fit(good_xys)

cluster_centers = [xy_to_lonlat(cc[0], cc[1]) for cc in kmeans.cluster_centers_]

map_la = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_la)
HeatMap(restaurant_latlons).add_to(map_la)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_la)
folium.Marker(la_center).add_to(map_la)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=True, fill_opacity=0.25).add_to(map_la) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_la)

map_la

In [130]:
map_la = folium.Map(location=roi_center, zoom_start=14)
folium.Marker(la_center).add_to(map_la)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#00000000', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_la)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_la)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_la) 

map_la

Let's zoom in on candidate areas in **Galeria Market**:

In [131]:
map_la = folium.Map(location=[34.0656433, -118.2913283], zoom_start=15)
folium.Marker(la_center).add_to(map_la)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_la) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_la)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_la)

map_la