# The Battle of Neighbourhoods Notebook

Importing required libraries

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
PostalCodes = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(PostalCodes,'lxml')

In [4]:
result = soup.prettify().splitlines()
print('\n'.join(result[:20] + result[-20:]))

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"X-TPEwpAMNQAApy71ToAAADG","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":995657573,"wgRevisionId":995657573,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communications in Ontar

In [9]:
My_table = soup.find('table',{'class':'wikitable sortable'})
My_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td

In [10]:
rows = My_table.findAll('tr')
rows

[<tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighbourhood
 </th></tr>,
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>,
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>,
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>,
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>,
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park, Harbourfront
 </td></tr>,
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor, Lawrence Heights
 </td></tr>,
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park, Ontario Provincial Government
 </td></tr>,
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>,
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue, Humber Valley Village
 </td></tr>,
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern, Rouge
 </td></tr>,
 <tr>
 <td>M2B
 </td>
 <td>Not assigned
 </td>

In [14]:
parsed_data = []

In [15]:
for row in rows:
    children = row.findChildren(recursive=False)
    row_text = []
    for child in children: 
        clean_text = child.text 
        clean_text = clean_text.split('&#91;')[0] # This is to discard reference/citation links
        clean_text = clean_text.split('&#160;')[-1] # This is to clean the header row of the sort icons
        clean_text = clean_text.strip()
        row_text.append(clean_text)
    parsed_data.append(row_text)

In [18]:
parsed_data[:5]

[['Postal Code', 'Borough', 'Neighbourhood'],
 ['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village']]

In [28]:
# Define the dataframe columns
column_names = ['Postal Code', 'Borough', 'Neighborhood']

# Instantiate and populate the dataframe
df = pd.DataFrame(parsed_data[1:], columns=column_names)

# Examine the resulting dataframe
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [29]:
# Process the cells that have an assigned borough while ignoring the cells with a borough that is not assigned.
df.drop(df[df['Borough']=='Not assigned'].index, inplace=True)
df.reset_index(inplace=True, drop=True)
print("The new number of rows in dataframe after dropping unassigned boroughs:", df.shape[0])

The new number of rows in dataframe after dropping unassigned boroughs: 103


In [31]:
df['Neighborhood'].where(df['Neighborhood'] != 'Not assigned', df['Borough'], inplace=True)
df=df.groupby("Postal Code").agg(lambda x:','.join(set(x)))
df=df.reset_index()
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [23]:
print("The number of rows in dataframe:", df.shape[0])
df.shape

The number of rows in dataframe: 103


(103, 3)

In [26]:
geodata = pd.read_csv('https://cocl.us/Geospatial_data')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [36]:
# Merge the original df_copy with geodata
df = df_copy.merge(geodata, how='inner', on='PostalCode')
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [41]:
# Check how many boroughs and neighborhoods there are
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )  
)      

The dataframe has 10 boroughs and 103 neighborhoods.


In [43]:
%pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 3.1 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [44]:
import folium

In [51]:
# Segment and Cluster by Downtown Toronto
tor_data = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
tor_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [52]:
tor_data.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
14,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442
15,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846
16,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228
17,M6G,Downtown Toronto,Christie,43.669542,-79.422564
18,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [53]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [54]:
# Use geopy library to get the latitude and longitude values of Toronto.
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [55]:
# Create a map of Downtown Toronto using Latitude and Longitude values
map_tor = folium.Map(location=[latitude, longitude], zoom_start=13)

# Add markers to map
for lat, lng, label in zip(tor_data['Latitude'], tor_data['Longitude'], tor_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

Utilizing the Foursquare API to explore the neighborhoods and segment them.

In [59]:
CLIENT_ID = 'X3LGY23MQEFDB3XFFRJQTDNKTEA5ZFKENIXRARKJKYDB2L0C' # Foursquare ID
CLIENT_SECRET = '0A4KVL1JOH14VBNYWWCHVZLLXM522RJVTJR105RI5NE50CP4' # Foursquare Secret
VERSION = '20210106' # Foursquare API version

print('Credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Credentails:
CLIENT_ID: X3LGY23MQEFDB3XFFRJQTDNKTEA5ZFKENIXRARKJKYDB2L0C
CLIENT_SECRET:0A4KVL1JOH14VBNYWWCHVZLLXM522RJVTJR105RI5NE50CP4


In [62]:
tor_data.loc[0, 'Neighborhood'] # Get the neighborhood name

'Rosedale'

In [64]:
# Get the neighborhoods' latitude and longitude values
neighborhood_latitude = tor_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = tor_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = tor_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


In [69]:
# Let's get the top 100 venues that are in Rosedale within a radius of 500 meters.
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id=X3LGY23MQEFDB3XFFRJQTDNKTEA5ZFKENIXRARKJKYDB2L0C&client_secret=0A4KVL1JOH14VBNYWWCHVZLLXM522RJVTJR105RI5NE50CP4&v=20210106&ll=43.6056466,-79.50132070000001&radius=500&limit=100'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=X3LGY23MQEFDB3XFFRJQTDNKTEA5ZFKENIXRARKJKYDB2L0C&client_secret=0A4KVL1JOH14VBNYWWCHVZLLXM522RJVTJR105RI5NE50CP4&v=20210106&ll=43.6056466,-79.50132070000001&radius=500&limit=100'

In [70]:
# Send the GET request and examine the results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ff50b337b1cb0799b9c87dd'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.6101466045, 'lng': -79.49511771930959},
   'sw': {'lat': 43.6011465955, 'lng': -79.50752368069043}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b119977f964a520488023e3',
       'name': 'LCBO',
       'location': {'address': '2762 Lake Shore Blvd W',
        'crossStreet': 'btwn 1st & 2nd St',
        'lat': 43.60228082768786,
        'lng': -79.4993016827402,
        'labeledLatLngs': [{'label': 'display',
          'lat':

In [71]:
# All the information is in the items key. Let's borrow the get_category_type function from the Foursquare lab.
# Function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [72]:
# Clean the json and structure in into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()



Unnamed: 0,name,categories,lat,lng
0,LCBO,Liquor Store,43.602281,-79.499302
1,New Toronto Fish & Chips,Restaurant,43.601849,-79.503281
2,Domino's Pizza,Pizza Place,43.601583,-79.500905
3,Delicia Bakery & Pastry,Bakery,43.601403,-79.503012
4,Lucky Dice Restaurant,Café,43.601392,-79.503056


Number of values that were returned by Foursquare?

In [77]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


Use the function from the lab to repeat the same process to all the neighborhoods in Downtown Toronto

In [78]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [79]:
# Run the above function on each neighborhood and create a new dataframe called tor_venues.
tor_venues = getNearbyVenues(names=tor_data['Neighborhood'],
                                   latitudes=tor_data['Latitude'],
                                   longitudes=tor_data['Longitude']
                                  )

Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Queen's Park, Ontario Provincial Government


In [80]:
# size of the resulting dataframe
print(tor_venues.shape)
tor_venues.head()

(1231, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"St. James Town, Cabbagetown",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner


In [81]:
tor_venues.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1226,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Hart House Gym,43.664172,-79.394888,Gym
1227,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Convocation Hall,43.660828,-79.395245,College Auditorium
1228,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Tim Hortons,43.659415,-79.391221,Coffee Shop
1229,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,SUDS,43.65988,-79.394712,Bar
1230,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Tim Hortons,43.658906,-79.388696,Coffee Shop


In [86]:
# How many unique categories can be collected from all the returned venues
print('There are {} uniques categories.'.format(len(tor_venues['Venue Category'].unique())))

There are 208 uniques categories.


In [95]:
# Analyze each neighborhood
# one hot encoding
tor_onehot = pd.get_dummies(tor_venuestest[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighborhood'] = tor_venuestest['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_grouped = tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Doner Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Portuguese Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.153846,0.076923,0.0,0.076923,0.0,0.076923,0.0
1,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.210526,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.052632,0.052632,0.052632,0.052632,0.0,0.105263,0.0,0.052632,0.0
2,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Church and Wellesley,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.107143,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.178571,0.035714,0.0,0.071429,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.107143,0.0,0.178571,0.0,0.035714,0.035714,0.0,0.035714
4,"Commerce Court, Victoria Hotel",0.125,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.125,0.09375,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.21875,0.09375,0.0,0.0,0.0625,0.0,0.0625,0.0


In [96]:
tor_venuestest.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
5,"St. James Town, Cabbagetown",43.667967,-79.367675,Kingyo Toronto,43.665895,-79.368415,Japanese Restaurant
6,"St. James Town, Cabbagetown",43.667967,-79.367675,Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant
7,"St. James Town, Cabbagetown",43.667967,-79.367675,Murgatroid,43.667381,-79.369311,Restaurant
8,"St. James Town, Cabbagetown",43.667967,-79.367675,F'Amelia,43.667536,-79.368613,Italian Restaurant
19,"St. James Town, Cabbagetown",43.667967,-79.367675,Mr. Jerk,43.667328,-79.373389,Caribbean Restaurant
20,"St. James Town, Cabbagetown",43.667967,-79.367675,Kanpai Snack Bar,43.664331,-79.368065,Taiwanese Restaurant
23,"St. James Town, Cabbagetown",43.667967,-79.367675,The Pear Tree,43.664904,-79.368246,Restaurant
24,"St. James Town, Cabbagetown",43.667967,-79.367675,Thai Room - Carlton,43.664159,-79.368189,Thai Restaurant
28,"St. James Town, Cabbagetown",43.667967,-79.367675,Hey Lucy,43.664075,-79.368655,Italian Restaurant
36,"St. James Town, Cabbagetown",43.667967,-79.367675,China Gourmet,43.66418,-79.368359,Chinese Restaurant


In [97]:
tor_grouped.tail(10)

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Doner Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Portuguese Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
7,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.214286,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.214286,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.0
8,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.058824,0.0,0.117647,0.0,0.0,0.117647,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.176471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.176471
9,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0
10,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,"Richmond, Adelaide, King",0.083333,0.041667,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.041667,0.041667,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.041667,0.166667,0.041667,0.083333,0.0,0.125,0.0,0.041667,0.0
12,St. James Town,0.142857,0.047619,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.095238,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.095238,0.047619,0.0,0.0,0.095238,0.095238,0.0,0.0,0.047619,0.0,0.047619,0.0
13,"St. James Town, Cabbagetown",0.076923,0.0,0.0,0.0,0.076923,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.153846,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.076923,0.076923,0.076923,0.0,0.0,0.0
14,Stn A PO Boxes,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.043478,0.130435,0.130435,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.130435,0.173913,0.043478,0.0,0.043478,0.0,0.043478,0.0
15,"Toronto Dominion Centre, Design Exchange",0.111111,0.074074,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.037037,0.037037,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.148148,0.111111,0.074074,0.0,0.0,0.0,0.037037,0.0
16,"University of Toronto, Harbord",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0


In [100]:
tor_grouped.shape

(17, 41)

In [101]:
# Examine the new dataframe size
tor_onehot.shape

(302, 41)

In [102]:
# Let's confirm the new size
tor_grouped.shape

(17, 41)

In [103]:
tor_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Doner Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Portuguese Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.153846,0.076923,0.0,0.076923,0.0,0.076923,0.0
1,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.210526,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.052632,0.052632,0.052632,0.052632,0.0,0.105263,0.0,0.052632,0.0
2,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Church and Wellesley,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.107143,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.178571,0.035714,0.0,0.071429,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.107143,0.0,0.178571,0.0,0.035714,0.035714,0.0,0.035714
4,"Commerce Court, Victoria Hotel",0.125,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.125,0.09375,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.21875,0.09375,0.0,0.0,0.0625,0.0,0.0625,0.0
5,"First Canadian Place, Underground city",0.096774,0.096774,0.0,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.032258,0.0,0.032258,0.129032,0.0,0.032258,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.129032,0.096774,0.064516,0.0,0.064516,0.0,0.032258,0.0
6,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.15,0.0,0.0,0.0,0.05,0.15,0.05,0.0,0.0,0.0,0.0,0.1,0.05,0.05,0.0,0.0,0.05,0.0,0.0,0.05
7,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.214286,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.214286,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.0
8,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.058824,0.0,0.117647,0.0,0.0,0.117647,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.176471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.176471
9,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0


In [107]:
# Let's print each neighborhood along with the top 10 most common venues
num_top_venues = 10

for hood in tor_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = tor_grouped[tor_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('/n')

----Berczy Park----
                           venue  freq
0             Seafood Restaurant  0.15
1                     Restaurant  0.15
2            Japanese Restaurant  0.08
3  Vegetarian / Vegan Restaurant  0.08
4                Thai Restaurant  0.08
5               Sushi Restaurant  0.08
6        Comfort Food Restaurant  0.08
7    Eastern European Restaurant  0.08
8              Indian Restaurant  0.08
9               Greek Restaurant  0.08
/n
----Central Bay Street----
                        venue  freq
0          Italian Restaurant  0.21
1             Thai Restaurant  0.11
2         Japanese Restaurant  0.05
3            Ramen Restaurant  0.05
4           Indian Restaurant  0.05
5   Middle Eastern Restaurant  0.05
6  Modern European Restaurant  0.05
7           French Restaurant  0.05
8       Portuguese Restaurant  0.05
9          Falafel Restaurant  0.05
/n
----Christie----
                             venue  freq
0               Italian Restaurant   0.5
1                      

In [110]:
temp

Unnamed: 0,venue,freq
1,American Restaurant,0.0
2,Asian Restaurant,0.0
3,Belgian Restaurant,0.0
4,Brazilian Restaurant,0.0
5,Caribbean Restaurant,0.0
6,Chinese Restaurant,0.0
7,Colombian Restaurant,0.0
8,Comfort Food Restaurant,0.12
9,Doner Restaurant,0.0
10,Eastern European Restaurant,0.0


In [111]:
# Let's put that into a pandas dataframe
# Use the function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [112]:
# Create the new dataframe and display the top 15 venues for each neighborhood
num_top_venues = 15
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(16)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,Berczy Park,Seafood Restaurant,Restaurant,Comfort Food Restaurant,Eastern European Restaurant,Greek Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,French Restaurant,Sushi Restaurant,Thai Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,German Restaurant
1,Central Bay Street,Italian Restaurant,Thai Restaurant,Portuguese Restaurant,Falafel Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Korean Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,French Restaurant
2,Christie,Italian Restaurant,Restaurant,Eastern European Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Indian Restaurant,Comfort Food Restaurant,Colombian Restaurant,Chinese Restaurant
3,Church and Wellesley,Sushi Restaurant,Japanese Restaurant,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Vietnamese Restaurant,Mexican Restaurant,Caribbean Restaurant,Ethiopian Restaurant,Indian Restaurant,Korean Restaurant,American Restaurant,Theme Restaurant,Ramen Restaurant,Thai Restaurant
4,"Commerce Court, Victoria Hotel",Restaurant,Italian Restaurant,American Restaurant,Seafood Restaurant,Japanese Restaurant,Thai Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,French Restaurant,Fast Food Restaurant,Latin American Restaurant,Gluten-free Restaurant,New American Restaurant,Colombian Restaurant,Chinese Restaurant
5,"First Canadian Place, Underground city",Restaurant,Japanese Restaurant,American Restaurant,Asian Restaurant,Seafood Restaurant,Thai Restaurant,Sushi Restaurant,Mediterranean Restaurant,Brazilian Restaurant,Colombian Restaurant,Fast Food Restaurant,Gluten-free Restaurant,Greek Restaurant,Vegetarian / Vegan Restaurant,Latin American Restaurant
6,"Garden District, Ryerson",Japanese Restaurant,Middle Eastern Restaurant,Italian Restaurant,Fast Food Restaurant,Ramen Restaurant,Modern European Restaurant,Chinese Restaurant,Ethiopian Restaurant,Mexican Restaurant,Vietnamese Restaurant,Restaurant,Seafood Restaurant,Thai Restaurant,New American Restaurant,Filipino Restaurant
7,"Harbourfront East, Union Station, Toronto Islands",Italian Restaurant,Restaurant,Mexican Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Sushi Restaurant,Seafood Restaurant,Japanese Restaurant,Chinese Restaurant,New American Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Comfort Food Restaurant
8,"Kensington Market, Chinatown, Grange Park",Vietnamese Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Caribbean Restaurant,Comfort Food Restaurant,Belgian Restaurant,Filipino Restaurant,Japanese Restaurant,Doner Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Asian Restaurant,Falafel Restaurant,Fast Food Restaurant
9,"Queen's Park, Ontario Provincial Government",Sushi Restaurant,Italian Restaurant,Japanese Restaurant,Fast Food Restaurant,Mexican Restaurant,Portuguese Restaurant,Eastern European Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Greek Restaurant,Comfort Food Restaurant


In [113]:
# Run k-means to cluster the neighborhood into 5 clusters
# set number of clusters
kclusters = 5

tor_grouped_clustering = tor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 4, 3, 0, 0, 3, 2, 1, 3], dtype=int32)

In [114]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = tor_data

# merge to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

tor_merged.head() # check the columns

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,,,,,,,,,,,,,,,,
1,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,2.0,Italian Restaurant,Restaurant,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Japanese Restaurant,American Restaurant,Taiwanese Restaurant,Sushi Restaurant,Thai Restaurant,Seafood Restaurant,Doner Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,3.0,Sushi Restaurant,Japanese Restaurant,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Vietnamese Restaurant,Mexican Restaurant,Caribbean Restaurant,Ethiopian Restaurant,Indian Restaurant,Korean Restaurant,American Restaurant,Theme Restaurant,Ramen Restaurant,Thai Restaurant
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4.0,Italian Restaurant,French Restaurant,Restaurant,Eastern European Restaurant,Gluten-free Restaurant,German Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Indian Restaurant,Comfort Food Restaurant,Colombian Restaurant,Chinese Restaurant
4,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3.0,Japanese Restaurant,Middle Eastern Restaurant,Italian Restaurant,Fast Food Restaurant,Ramen Restaurant,Modern European Restaurant,Chinese Restaurant,Ethiopian Restaurant,Mexican Restaurant,Vietnamese Restaurant,Restaurant,Seafood Restaurant,Thai Restaurant,New American Restaurant,Filipino Restaurant


In [115]:
# Ignore/drop NaNs
tor_merged.dropna(axis=0, how='any',inplace=True)
tor_merged.reset_index(inplace=True, drop=True)
print("Number of rows after dropping NaNs:", len(tor_merged))
print("Number of NaNs:", tor_merged.isna().sum())

Number of rows after dropping NaNs: 17
Number of NaNs: PostalCode                0
Borough                   0
Neighborhood              0
Latitude                  0
Longitude                 0
Cluster Labels            0
1st Most Common Venue     0
2nd Most Common Venue     0
3rd Most Common Venue     0
4th Most Common Venue     0
5th Most Common Venue     0
6th Most Common Venue     0
7th Most Common Venue     0
8th Most Common Venue     0
9th Most Common Venue     0
10th Most Common Venue    0
11th Most Common Venue    0
12th Most Common Venue    0
13th Most Common Venue    0
14th Most Common Venue    0
15th Most Common Venue    0
dtype: int64


In [116]:
# Visualize the Clusters
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [117]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [118]:
tor_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0.0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
1.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2.0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
3.0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
4.0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2


In [119]:
# Cluster 1
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[2] + list(range(6, tor_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
4,St. James Town,American Restaurant,Seafood Restaurant,Moroccan Restaurant,Restaurant,Italian Restaurant,French Restaurant,Japanese Restaurant,Thai Restaurant,Asian Restaurant,Belgian Restaurant,New American Restaurant,German Restaurant,Middle Eastern Restaurant,Comfort Food Restaurant,Vegetarian / Vegan Restaurant
5,Berczy Park,Seafood Restaurant,Restaurant,Comfort Food Restaurant,Eastern European Restaurant,Greek Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,French Restaurant,Sushi Restaurant,Thai Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,German Restaurant
7,"Richmond, Adelaide, King",Restaurant,Thai Restaurant,American Restaurant,Sushi Restaurant,Colombian Restaurant,Gluten-free Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Latin American Restaurant,Mediterranean Restaurant,Modern European Restaurant,New American Restaurant,Brazilian Restaurant,Ramen Restaurant,Seafood Restaurant
9,"Toronto Dominion Centre, Design Exchange",Restaurant,Italian Restaurant,Seafood Restaurant,Japanese Restaurant,American Restaurant,Sushi Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,Chinese Restaurant,Fast Food Restaurant,French Restaurant,Gluten-free Restaurant,Greek Restaurant,New American Restaurant,Modern European Restaurant
10,"Commerce Court, Victoria Hotel",Restaurant,Italian Restaurant,American Restaurant,Seafood Restaurant,Japanese Restaurant,Thai Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,French Restaurant,Fast Food Restaurant,Latin American Restaurant,Gluten-free Restaurant,New American Restaurant,Colombian Restaurant,Chinese Restaurant
13,Stn A PO Boxes,Seafood Restaurant,Italian Restaurant,Restaurant,Japanese Restaurant,Molecular Gastronomy Restaurant,Comfort Food Restaurant,Eastern European Restaurant,Fast Food Restaurant,French Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,American Restaurant,Thai Restaurant,Sushi Restaurant,Moroccan Restaurant
14,"First Canadian Place, Underground city",Restaurant,Japanese Restaurant,American Restaurant,Asian Restaurant,Seafood Restaurant,Thai Restaurant,Sushi Restaurant,Mediterranean Restaurant,Brazilian Restaurant,Colombian Restaurant,Fast Food Restaurant,Gluten-free Restaurant,Greek Restaurant,Vegetarian / Vegan Restaurant,Latin American Restaurant


In [120]:
# Cluster 2
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[2] + list(range(6, tor_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
12,"Kensington Market, Chinatown, Grange Park",Vietnamese Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Caribbean Restaurant,Comfort Food Restaurant,Belgian Restaurant,Filipino Restaurant,Japanese Restaurant,Doner Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Asian Restaurant,Falafel Restaurant,Fast Food Restaurant


In [125]:
# Cluster 3
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[2] + list(range(6, tor_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,"St. James Town, Cabbagetown",Italian Restaurant,Restaurant,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Japanese Restaurant,American Restaurant,Taiwanese Restaurant,Sushi Restaurant,Thai Restaurant,Seafood Restaurant,Doner Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant
6,Central Bay Street,Italian Restaurant,Thai Restaurant,Portuguese Restaurant,Falafel Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Korean Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,French Restaurant
8,"Harbourfront East, Union Station, Toronto Islands",Italian Restaurant,Restaurant,Mexican Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Sushi Restaurant,Seafood Restaurant,Japanese Restaurant,Chinese Restaurant,New American Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Comfort Food Restaurant
11,"University of Toronto, Harbord",Italian Restaurant,Japanese Restaurant,Sushi Restaurant,French Restaurant,Restaurant,Comfort Food Restaurant,Caribbean Restaurant,Brazilian Restaurant,Chinese Restaurant,Colombian Restaurant,Greek Restaurant,Doner Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant


In [126]:
# Cluster 4
tor_merged.loc[tor_merged['Cluster Labels'] == 3, tor_merged.columns[[2] + list(range(6, tor_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
1,Church and Wellesley,Sushi Restaurant,Japanese Restaurant,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Vietnamese Restaurant,Mexican Restaurant,Caribbean Restaurant,Ethiopian Restaurant,Indian Restaurant,Korean Restaurant,American Restaurant,Theme Restaurant,Ramen Restaurant,Thai Restaurant
3,"Garden District, Ryerson",Japanese Restaurant,Middle Eastern Restaurant,Italian Restaurant,Fast Food Restaurant,Ramen Restaurant,Modern European Restaurant,Chinese Restaurant,Ethiopian Restaurant,Mexican Restaurant,Vietnamese Restaurant,Restaurant,Seafood Restaurant,Thai Restaurant,New American Restaurant,Filipino Restaurant
16,"Queen's Park, Ontario Provincial Government",Sushi Restaurant,Italian Restaurant,Japanese Restaurant,Fast Food Restaurant,Mexican Restaurant,Portuguese Restaurant,Eastern European Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Greek Restaurant,Comfort Food Restaurant


In [127]:
# Cluster 5
tor_merged.loc[tor_merged['Cluster Labels'] == 4, tor_merged.columns[[2] + list(range(6, tor_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
2,"Regent Park, Harbourfront",Italian Restaurant,French Restaurant,Restaurant,Eastern European Restaurant,Gluten-free Restaurant,German Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Indian Restaurant,Comfort Food Restaurant,Colombian Restaurant,Chinese Restaurant
15,Christie,Italian Restaurant,Restaurant,Eastern European Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Doner Restaurant,Indian Restaurant,Comfort Food Restaurant,Colombian Restaurant,Chinese Restaurant
