# Clustering Neighborhoods in Toronto

In [2]:
import requests
import pandas as pd
import geocoder
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [3]:
wikipedia_url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

_download the content of the wikipedia page_

In [4]:
resp = requests.get(wikipedia_url)

_use pandas to parse the html file the result in data is an array of DataFrame one for each table in the html_

In [5]:
data=pd.read_html(resp.text)

_the first one is the table we are interested in_

In [6]:
NhoodDF=data[0]
NhoodDF.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


_Let's clean the data_

_remove Neighbourhood without Borough and replace Neighbourhood value if it is Not Assigned_

In [7]:
resultDF=pd.DataFrame()
for idx,row in NhoodDF.iterrows():
    if(row['Borough']=='Not assigned'):
        continue
    if(row['Neighbourhood']=='Not assigned'):
        row['Neighbourhood']=row['Borough']
    resultDF=resultDF.append(row,ignore_index=True)
resultDF=resultDF[['Postal Code','Borough','Neighbourhood']]

resultDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [8]:
print('cleaned dataframe has shape: {}'.format(resultDF.shape))

cleaned dataframe has shape: (103, 3)


In [9]:
g = geocoder.google('{}, Toronto, Ontario'.format('M5A'))

In [10]:
'''
for idx,row in resultDF.iterrows():
    postal_code=row['Postal Code']

    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        print('calling....',postal_code)
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        print('call returned ....',g.latlng)
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    print('{} {} {}'.format(postal_codde,latitude,longitude))

'''




"\nfor idx,row in resultDF.iterrows():\n    postal_code=row['Postal Code']\n\n    lat_lng_coords = None\n\n    # loop until you get the coordinates\n    while(lat_lng_coords is None):\n        print('calling....',postal_code)\n        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n        print('call returned ....',g.latlng)\n        lat_lng_coords = g.latlng\n    latitude = lat_lng_coords[0]\n    longitude = lat_lng_coords[1]\n    print('{} {} {}'.format(postal_codde,latitude,longitude))\n\n"

## geocoder doesn't work let's load the data from csv

In [11]:
geolocDF=pd.read_csv('./data/Geospatial_Coordinates.csv')
geolocDF.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


_add coordinates to our DataFrame_

In [12]:
resultDF=resultDF.merge(geolocDF,left_on='Postal Code',right_on='Postal Code')
resultDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


_let's filter the Borough to include only the toronto ones_

In [13]:
neighborhoods=pd.DataFrame(columns=['Postal Code','Borough','Neighbourhood','Longitude','Latitude'])
for idx,row in resultDF.iterrows():
    if 'toronto' in row['Borough'].lower().split() :
        neighborhoods=neighborhoods.append(row,ignore_index=True)
neighborhoods.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Longitude,Latitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",-79.360636,43.65426
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",-79.389494,43.662301
2,M5B,Downtown Toronto,"Garden District, Ryerson",-79.378937,43.657162
3,M5C,Downtown Toronto,St. James Town,-79.375418,43.651494
4,M4E,East Toronto,The Beaches,-79.293031,43.676357


_draw the map of toronto with the Neighbourhoods_

In [14]:
longitude=-79.375418
latitude=43.651494
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

_define fousquare credentials_

In [15]:
CLIENT_ID = 'V5TQ0NXY4D1FWWPLNLJUKXRCC4K3ZKWQ4OFKZOZ10TMZS33O' # your Foursquare ID
CLIENT_SECRET = 'PSFUMV00XI2CVP4R0Z4NBNIVIA4E4G20TMU5N22IGZCIBXT5' # your Foursquare Secret
ACCESS_TOKEN = 'WZNVLZKGURJN13ZDU45BABFZKDBMAEYQM1QZDUQ0DZMLMJ3R' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30

_let's explore the venues around one of the Neighbourhood_

_search chinese restaurants in one miles around The Beaches neighbourood_

In [16]:
latitude=neighborhoods.loc[4,'Latitude']
longitude=neighborhoods.loc[4,'Longitude']

search_query = 'chinese'
radius = 1600
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()


_draw the found restaurants on the map_

In [17]:

map_the_beaches = folium.Map(location=[latitude, longitude], zoom_start=14)
for idx,res in enumerate(results['response']['venues']):
    #print(res)
    name=res['name']
    lat=res['location']['lat']
    lng=res['location']['lng']
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=name,
        color='blue',
        fill=True,
        fill_color='#ff0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_the_beaches)  



In [18]:
map_the_beaches

_get the venues for all the neigbouroods_

In [19]:
latitude=neighborhoods.loc[4,'Latitude']
longitude=neighborhoods.loc[4,'Longitude']

search_query = 'chinese'
radius = 1600
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '602b75ce22b38d24d0be08d5'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'query': 'chinese',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.69075741440002,
    'lng': -79.27315828636966},
   'sw': {'lat': 43.66195738559998, 'lng': -79.31290411363034}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd4925d637ba59348f7f470',
       'name': 'Garden Gate Resturant',
       'location': {'address': '2379 Queen St E',
        'crossStreet': 'Beech Ave',
        'lat': 43.67261471952654,
        'lng': -79.28749153616172,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67261471952654,
          'lng': -79.2874

In [32]:
venues=pd.json_normalize(results['response']['groups'][0]['items'])

In [37]:
for i,r in venues.iterrows():
    print(r['venue.categories'])

[{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
[{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
[{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
[{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
[{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese

In [78]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


def get_venues(neighDF,radius=500):
    resultDF=pd.DataFrame(columns=['Neighbourhood','Category'])
    final=pd.DataFrame()
    LIMIT=100
    for i,r in neighDF.iterrows():
        ]print(r['Neighbourhood'])
        latitude=r['Latitude']
        longitude=r['Longitude']
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
        results = requests.get(url).json()
        # get the venues
        venues=pd.json_normalize(results['response']['groups'][0]['items'])
        
        #print(len(venues),type(venues),venues.head(10))
        for ii,v in venues.iterrows():
            #print('cat',v['venue.categories'])
            cat=get_category_type(v)
            resultDF=resultDF.append({'Neighbourhood':r['Neighbourhood'],'Category':cat},ignore_index=True)
        one_hot_cat=pd.get_dummies(resultDF['Category'])
        one_hot_cat['Neighbourhood']=resultDF['Neighbourhood']
        final=final.append(one_hot_cat)
    return final

        
        

In [79]:
fvenues=get_venues(neighborhoods)

Regent Park, Harbourfront
cat [{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
Queen's Park, Ontario Provincial Government
cat [{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
cat [{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}]
cat [{'id': '4bf58dd8d48988d145941735', 'name': 'Chinese Restaurant', 'pluralName': 'Chinese Restaurants', 'shortName': 'Chinese', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'su

In [80]:
fvenues=fvenues.fillna(0)

In [81]:
fvenues

Unnamed: 0,Chinese Restaurant,Neighbourhood,Taiwanese Restaurant,Fried Chicken Joint,Thai Restaurant,Bubble Tea Shop,Sushi Restaurant,Dim Sum Restaurant,Comfort Food Restaurant,Asian Restaurant,Bar,Cantonese Restaurant,Dumpling Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Noodle House,Szechuan Restaurant
0,1,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,0,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
169,1,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
170,0,Church and Wellesley,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
171,0,Church and Wellesley,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [82]:
fvenues.groupby(['Neighbourhood']).sum()

Unnamed: 0_level_0,Chinese Restaurant,Taiwanese Restaurant,Fried Chicken Joint,Thai Restaurant,Bubble Tea Shop,Sushi Restaurant,Dim Sum Restaurant,Comfort Food Restaurant,Asian Restaurant,Bar,Cantonese Restaurant,Dumpling Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Noodle House,Szechuan Restaurant
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Berczy Park,102.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Central Bay Street,495.0,0.0,33.0,33.0,33.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Church and Wellesley,10.0,4.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Commerce Court, Victoria Hotel",253.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Davisville,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"First Canadian Place, Underground city",36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Garden District, Ryerson",370.0,0.0,37.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Harbourfront East, Union Station, Toronto Islands",145.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Kensington Market, Chinatown, Grange Park",351.0,0.0,9.0,0.0,9.0,9.0,9.0,0.0,9.0,9.0,9.0,18.0,9.0,9.0,9.0,9.0
Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
