# Segmenting and Clustering Neighborhoods in Toronto

## Importing relevant packages

In [1]:
import numpy as np
import pandas as pd 
import bs4 as bs
import urllib.request
from bs4 import BeautifulSoup


# pip install geocoder
# !pip install geopy
# pip install folium==0.5.0
import requests  # library to handle requests
import pandas as pd 
import numpy as np 
import random 

from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize
# plotting library
import folium 
import geocoder

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

## 1 - Part 1: Create table from Wikipedia

- Read the data from wikipedia and storing in a dataframe
- Ignore cells with boroughs that are marked as Not assigned
- Group neighborhoods with same postal code
- Assign the borough name to neighborhoods marked as Not assigned

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# Creating BeautifulSoup object
filesource = urllib.request.urlopen(url).read()
soup = BeautifulSoup(filesource,'html.parser')

# Creating a dictionary to get each component
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]  # Postal code has 3 values
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

df=pd.DataFrame(table_contents)

# Ignore cells with a borough Not assigned and group neighborhoods for similar postal code
df = df[df.Borough != 'Not assigned']
df= df.groupby(['PostalCode','Borough']).agg(', '.join)
df.reset_index(inplace=True)

# If a cell has a Not assigned neighborhood, the name will be the name of the Borough
df.Neighborhood[df['Neighborhood']=='Not assigned'] = df.Borough[df['Neighborhood']=='Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [3]:
df.shape

(103, 3)

## 2 - Part 2: Get latitute and longitude for each postal code using geocoder



In [4]:

postal_code = df['PostalCode']

# Empty list to fill with latitude and longitude
latitude = []
longitude = []

n = 0
while n < len(postal_code):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code[n]))
    lat_lng_coords = g.latlng
    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    n = n + 1
df['Latitude'] = latitude
df['Longitude'] = longitude

df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892


## 3 - Part 3: Explore and cluster the neighbohoods in Toronto 

### Getting coordinates of Toronto

In [6]:
address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto with neighborhoods superimposed on top.

In [7]:
# using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Now let's explore the venues in Toronto. First define Foursquare Credentials and Version

In [9]:
# Getting foursquare api credentials
CLIENT_ID = '***' # your Foursquare ID
CLIENT_SECRET = '***' # your Foursquare Secret
ACCESS_TOKEN = '***' # your FourSquare Access Token
VERSION = '20210604'




### Let's create a function to get the nearby venues to all the neighborhoods in Toronto

In [10]:
LIMIT = 100
radius = 500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Now we create a new variable, to store the nearby venues for Toronto

In [11]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                  latitudes=df['Latitude'],
                                  longitudes=df['Longitude'])

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
The Danforth  East
The Danforth West, Riverdale


### Let's check the size of our dataframe

In [12]:

print(toronto_venues.shape)
toronto_venues.head()

(2266, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.81139,-79.19662,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Malvern, Rouge",43.81139,-79.19662,R & K Woodworking Specialists Inc,43.808233,-79.196857,Construction & Landscaping
2,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,SEBS Engineering Inc. (Sustainable Energy and ...,43.782371,-79.15682,Construction & Landscaping
3,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood, Morningside, West Hill",43.76575,-79.1747,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping


### Let's check how many venues were returned for each neighborhood

In [13]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,15,15,15,15,15,15
"Alderwood, Long Branch",4,4,4,4,4,4
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",19,19,19,19,19,19
Berczy Park,48,48,48,48,48,48
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",17,17,17,17,17,17
Woburn,5,5,5,5,5,5
Woodbine Heights,18,18,18,18,18,18
York Mills West,4,4,4,4,4,4


### Let's find out how many unique categories can be curated from all the returned venues

In [14]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 255 uniques categories.


## Analyze each neighborhood

In [15]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [16]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.066667,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.000000,...,0.0,0.0,0.041667,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
96,Woburn,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
97,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.055556,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
98,York Mills West,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


### Let's print each neighborhood along with the top 5 most common venues

In [17]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')




----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.07
2      Sandwich Place  0.05
3   French Restaurant  0.04
4          Restaurant  0.04


----Caledonia-Fairbanks----
                 venue  freq
0               Bakery  0.14
1                 Café  0.14
2   Mexican Restaurant  0.14
3  Sporting Goods Shop  0.14
4                 Park  0.14


----Cedarbrae----
         venue  freq
0  Gaming Cafe   0.5
1        Trail   0.5
2  Yoga Studio   0.0
3  Music Venue   0.0
4  Music Store   0.0


----Central Bay Street----
              venue  freq
0       Coffee Shop  0.11
1    Clothing Store  0.08
2    Sandwich Place  0.06
3  Sushi Restaurant  0.06
4       Pizza Place  0.06


----Christie----
                venue  freq
0                Café  0.33
1       Grocery Store  0.22
2  Italian Restaurant  0.11
3          Baby Store  0.11
4         Coffee Sho

### Let's put that into a pandas dataframe
First, let's write a function to sort the venues in descending order

In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Bubble Tea Shop,Grocery Store,Skating Rink,Shopping Mall,Shanghai Restaurant,Supermarket,Sushi Restaurant,Bakery,Badminton Court
1,"Alderwood, Long Branch",Breakfast Spot,Performing Arts Venue,Pub,Convenience Store,Pet Store,Nail Salon,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant
2,Bayview Village,Trail,Construction & Landscaping,Park,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,New American Restaurant,Moroccan Restaurant
3,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Juice Bar,Comfort Food Restaurant,Sushi Restaurant,Restaurant,Pub,Sports Club,Thai Restaurant,Greek Restaurant
4,Berczy Park,Sandwich Place,Bakery,Cocktail Bar,Seafood Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Beer Bar,Farmers Market,Comfort Food Restaurant,Molecular Gastronomy Restaurant


# Cluster Neighborhoods
Run k-means to cluster the neighborhood into 5 clusters.

In [22]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [23]:
# add clustering labels
#del neighborhoods_venues_sorted
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood',how='right')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,M1S,Scarborough,Agincourt,43.79452,-79.26708,0,Chinese Restaurant,Bubble Tea Shop,Grocery Store,Skating Rink,Shopping Mall,Shanghai Restaurant,Supermarket,Sushi Restaurant,Bakery,Badminton Court
89,M8W,Etobicoke,"Alderwood, Long Branch",43.60124,-79.53879,0,Breakfast Spot,Performing Arts Venue,Pub,Convenience Store,Pet Store,Nail Salon,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant
19,M2K,North York,Bayview Village,43.78112,-79.3806,4,Trail,Construction & Landscaping,Park,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,New American Restaurant,Moroccan Restaurant
62,M5M,North York,"Bedford Park, Lawrence Manor East",43.73545,-79.41916,0,Coffee Shop,Sandwich Place,Juice Bar,Comfort Food Restaurant,Sushi Restaurant,Restaurant,Pub,Sports Club,Thai Restaurant,Greek Restaurant
56,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306,0,Sandwich Place,Bakery,Cocktail Bar,Seafood Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Beer Bar,Farmers Market,Comfort Food Restaurant,Molecular Gastronomy Restaurant


In [24]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters
Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster.

## Cluster 1

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,0,Chinese Restaurant,Bubble Tea Shop,Grocery Store,Skating Rink,Shopping Mall,Shanghai Restaurant,Supermarket,Sushi Restaurant,Bakery,Badminton Court
89,Etobicoke,0,Breakfast Spot,Performing Arts Venue,Pub,Convenience Store,Pet Store,Nail Salon,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant
62,North York,0,Coffee Shop,Sandwich Place,Juice Bar,Comfort Food Restaurant,Sushi Restaurant,Restaurant,Pub,Sports Club,Thai Restaurant,Greek Restaurant
56,Downtown Toronto,0,Sandwich Place,Bakery,Cocktail Bar,Seafood Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Beer Bar,Farmers Market,Comfort Food Restaurant,Molecular Gastronomy Restaurant
9,Scarborough,0,General Entertainment,Café,College Stadium,Skating Rink,Yoga Studio,Moving Target,Music Venue,Music Store,Museum,Moroccan Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
24,North York,0,Pizza Place,Coffee Shop,Supermarket,Park,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,New American Restaurant,Movie Theater
21,North York,0,Korean Restaurant,Pizza Place,Middle Eastern Restaurant,Café,Shopping Mall,Japanese Restaurant,Grocery Store,Fried Chicken Joint,Dessert Shop,Coffee Shop
3,Scarborough,0,Mexican Restaurant,Park,Business Service,Coffee Shop,Korean BBQ Restaurant,Peruvian Restaurant,Persian Restaurant,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant
36,East York,0,Grocery Store,Pharmacy,Café,Bus Line,Playground,Pet Store,Doctor's Office,Fast Food Restaurant,Breakfast Spot,Metro Station


## Cluster 2

In [26]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,1,Auto Garage,Yoga Studio,Newsagent,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater


## Cluster 3

In [27]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,2,Park,Yoga Studio,New American Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant


## Cluster 4

In [28]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
82,West Toronto,3,Sandwich Place,Yoga Studio,Pilates Studio,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant


## Cluster 5

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,4,Trail,Construction & Landscaping,Park,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,New American Restaurant,Moroccan Restaurant
64,Central Toronto,4,Park,Business Service,Yoga Studio,New American Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant
2,Scarborough,4,Construction & Landscaping,Bus Stop,Gym / Fitness Center,Park,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Modern European Restaurant,Newsagent,Movie Theater
17,North York,4,Residential Building (Apartment / Condo),Park,Yoga Studio,Moroccan Restaurant,Music Venue,Music Store,Museum,Moving Target,Movie Theater,Monument / Landmark
73,York,4,Trail,Hockey Arena,Field,Park,Grocery Store,Yoga Studio,Music Store,Museum,Moving Target,Movie Theater
44,Central Toronto,4,Bus Line,Park,Swim School,Yoga Studio,Nail Salon,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark
48,Central Toronto,4,Convenience Store,Park,Gym,Tennis Court,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Miscellaneous Shop,Monument / Landmark,New American Restaurant
88,Etobicoke,4,Park,Yoga Studio,Grocery Store,Skating Rink,Tennis Court,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Molecular Gastronomy Restaurant,Nail Salon
79,North York,4,Basketball Court,Park,Trail,Bakery,Yoga Studio,Movie Theater,Music Venue,Music Store,Museum,Moving Target
46,Central Toronto,4,Gym Pool,Park,Playground,Music Venue,Music Store,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Yoga Studio
