# Clustering Neighborhoods in Toronto

In [29]:
import requests
import pandas as pd
import geocoder
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [2]:
wikipedia_url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

_download the content of the wikipedia page_

In [3]:
resp = requests.get(wikipedia_url)

_use pandas to parse the html file the result in data is an array of DataFrame one for each table in the html_

In [4]:
data=pd.read_html(resp.text)

_the first one is the table we are interested in_

In [5]:
NhoodDF=data[0]
NhoodDF.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


_Let's clean the data_

_remove Neighbourhood without Borough and replace Neighbourhood value if it is Not Assigned_

In [6]:
resultDF=pd.DataFrame()
for idx,row in NhoodDF.iterrows():
    if(row['Borough']=='Not assigned'):
        continue
    if(row['Neighbourhood']=='Not assigned'):
        row['Neighbourhood']=row['Borough']
    resultDF=resultDF.append(row,ignore_index=True)
resultDF=resultDF[['Postal Code','Borough','Neighbourhood']]

resultDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [7]:
print('cleaned dataframe has shape: {}'.format(resultDF.shape))

cleaned dataframe has shape: (103, 3)


In [8]:
g = geocoder.google('{}, Toronto, Ontario'.format('M5A'))

In [9]:
'''
for idx,row in resultDF.iterrows():
    postal_code=row['Postal Code']

    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        print('calling....',postal_code)
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        print('call returned ....',g.latlng)
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    print('{} {} {}'.format(postal_codde,latitude,longitude))

'''




"\nfor idx,row in resultDF.iterrows():\n    postal_code=row['Postal Code']\n\n    lat_lng_coords = None\n\n    # loop until you get the coordinates\n    while(lat_lng_coords is None):\n        print('calling....',postal_code)\n        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n        print('call returned ....',g.latlng)\n        lat_lng_coords = g.latlng\n    latitude = lat_lng_coords[0]\n    longitude = lat_lng_coords[1]\n    print('{} {} {}'.format(postal_codde,latitude,longitude))\n\n"

## geocoder doesn't work let's load the data from csv

In [10]:
geolocDF=pd.read_csv('./data/Geospatial_Coordinates.csv')
geolocDF.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


_add coordinates to our DataFrame_

In [11]:
resultDF=resultDF.merge(geolocDF,left_on='Postal Code',right_on='Postal Code')
resultDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


_let's filter the Borough to include only the toronto ones_

In [32]:
neighborhoods=pd.DataFrame(columns=['Postal Code','Borough','Neighbourhood','Longitude','Latitude'])
for idx,row in resultDF.iterrows():
    if 'toronto' in row['Borough'].lower().split() :
        neighborhoods=neighborhoods.append(row,ignore_index=True)
neighborhoods.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Longitude,Latitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",-79.360636,43.65426
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",-79.389494,43.662301
2,M5B,Downtown Toronto,"Garden District, Ryerson",-79.378937,43.657162
3,M5C,Downtown Toronto,St. James Town,-79.375418,43.651494
4,M4E,East Toronto,The Beaches,-79.293031,43.676357


_draw the map of toronto with the Neighbourhoods_

In [75]:
longitude=-79.375418
latitude=43.651494
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

_define fousquare credentials_

In [39]:
CLIENT_ID = 'V5TQ0NXY4D1FWWPLNLJUKXRCC4K3ZKWQ4OFKZOZ10TMZS33O' # your Foursquare ID
CLIENT_SECRET = 'HEXKQC41N1YTKLU1ICVOAFMO04QXHK0LNONCAAH3QT2PI5TJ' # your Foursquare Secret
ACCESS_TOKEN = 'WZNVLZKGURJN13ZDU45BABFZKDBMAEYQM1QZDUQ0DZMLMJ3R' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30

_let's explore the venues around one of the Neighbourhood_

_search chinese restaurants in one miles around The Beaches neighbourood_

In [59]:
latitude=neighborhoods.loc[4,'Latitude']
longitude=neighborhoods.loc[4,'Longitude']

search_query = 'chinese'
radius = 1600
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()


_draw the found restaurants on the map_

In [74]:

map_the_beaches = folium.Map(location=[latitude, longitude], zoom_start=14)
for idx,res in enumerate(results['response']['venues']):
    #print(res)
    name=res['name']
    lat=res['location']['lat']
    lng=res['location']['lng']
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=name,
        color='blue',
        fill=True,
        fill_color='#ff0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_the_beaches)  



In [72]:
map_the_beaches