# Applied data science capstone: Segmenting and Clustering Neighborhoods in Toronto

In [139]:
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request
import requests
import numpy as np
%matplotlib inline


### Data Preprocessing

In [140]:
path='/Users/yi-hsieh/Documents/python_test/wiki_table.csv'
dft=pd.read_csv(path, header=None)
dft.columns=['PostalCode','Borough','Neighborhood']
dft.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Replace "not assigned" by NaN

In [141]:
dft.replace("Not assigned",np.nan,inplace=True)
dft.dropna(subset=["Borough"],axis=0,inplace=True)

Replace NaN in the Neighborhood column by the value from the Borough column

In [142]:
for p in dft.index:
    if  dft.loc[p,"Neighborhood"] is np.nan:
        dft.loc[p,"Neighborhood"]=dft.loc[8,"Borough"]


## Clustering the neighborhoods of Downtown Toronto

In [143]:
dft2=dft[dft['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
dft2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5A,Downtown Toronto,Harbourfront
1,M5A,Downtown Toronto,Regent Park
2,M5B,Downtown Toronto,Ryerson
3,M5B,Downtown Toronto,Garden District
4,M5C,Downtown Toronto,St. James Town


Combine neighborhoods that have the same postal code

In [144]:
dfg=pd.DataFrame() 
dfg['Neighborhood']=dft2.groupby(['PostalCode','Borough'])['Neighborhood']\
.apply(lambda x: "%s" % ', '.join(x))
dfg.reset_index(level=['PostalCode','Borough'],inplace=True)

In [145]:
dfg.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M4W,Downtown Toronto,Rosedale
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town"
2,M4Y,Downtown Toronto,Church and Wellesley
3,M5A,Downtown Toronto,"Harbourfront, Regent Park"
4,M5B,Downtown Toronto,"Ryerson, Garden District"
5,M5C,Downtown Toronto,St. James Town
6,M5E,Downtown Toronto,Berczy Park
7,M5G,Downtown Toronto,Central Bay Street
8,M5H,Downtown Toronto,"Adelaide, King, Richmond"
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station"


In [148]:
path2='/Users/yi-hsieh/Downloads/Geospatial_Coordinates.csv'
df_cor=pd.read_csv(path2)
df_cor.set_index('Postal Code',inplace=True)
df_cor.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [158]:
df_temp=dfg.set_index('PostalCode')
df_index=df_temp.index
df_cor2 = df_cor.loc[df_index]
df_cor2 = df_cor2.reset_index(drop=True)

In [159]:
dfg['Latitude']=df_cor2['Latitude']
dfg['Longitude']=df_cor2['Longitude']
dfg.head(18)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [151]:
import folium 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [153]:
dfg.shape

(18, 5)

In [154]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Downtown Toronto are 43.654027, -79.3802003.


### Create map of Downtown Toronto using latitude and longitude values


In [197]:
map_dtoron = folium.Map(location=[latitude, longitude], zoom_start=13, control_scale = True)
# add markers to map
for lat, lng, label in zip(dfg['Latitude'], dfg['Longitude'], dfg['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_dtoron)  
    
map_dtoron

### Access Foursqure to collect venues data

In [126]:
CLIENT_ID = 'IMDARLHECSG53NJCGAL3ECB4BEKNUKDHWCGZRYYMFG5WR5NR' 
CLIENT_SECRET = 'RPMTGBLIYWODVOYLLKWVYAM5KMHLWNLMP5CJ3BEDYJVOWVLD' 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IMDARLHECSG53NJCGAL3ECB4BEKNUKDHWCGZRYYMFG5WR5NR
CLIENT_SECRET:RPMTGBLIYWODVOYLLKWVYAM5KMHLWNLMP5CJ3BEDYJVOWVLD


In [128]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=IMDARLHECSG53NJCGAL3ECB4BEKNUKDHWCGZRYYMFG5WR5NR&client_secret=RPMTGBLIYWODVOYLLKWVYAM5KMHLWNLMP5CJ3BEDYJVOWVLD&v=20180605&ll=43.654027,-79.3802003&radius=500&limit=100'

In [131]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [164]:
dtoron_venues = getNearbyVenues(names=dfg['Neighborhood'],
                                   latitudes=dfg['Latitude'],
                                   longitudes=dfg['Longitude']
                                  )


Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie


In [165]:
dtoron_venues.shape

(1283, 7)

In [166]:
dtoron_venues.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"Cabbagetown, St. James Town",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner
5,"Cabbagetown, St. James Town",43.667967,-79.367675,Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant
6,"Cabbagetown, St. James Town",43.667967,-79.367675,F'Amelia,43.667536,-79.368613,Italian Restaurant
7,"Cabbagetown, St. James Town",43.667967,-79.367675,Kingyo Toronto,43.665895,-79.368415,Japanese Restaurant
8,"Cabbagetown, St. James Town",43.667967,-79.367675,Rashnaa Restaurant,43.668183,-79.369066,Indian Restaurant
9,"Cabbagetown, St. James Town",43.667967,-79.367675,Merryberry Cafe + Bistro,43.66663,-79.368792,Café


In [167]:
dtoron_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",13,13,13,13,13,13
"Cabbagetown, St. James Town",49,49,49,49,49,49
Central Bay Street,81,81,81,81,81,81
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,16,16,16,16,16,16
Church and Wellesley,89,89,89,89,89,89
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100


In [169]:
print('There are {} uniques categories.'.format(len(dtoron_venues['Venue Category'].unique())))

There are 203 uniques categories.


## Analyze Each Neighborhood 

In [170]:
# one hot encoding
dtoron_onehot = pd.get_dummies(dtoron_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dtoron_onehot['Neighborhood'] = dtoron_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [dtoron_onehot.columns[-1]] + list(dtoron_onehot.columns[:-1])
dtoron_onehot = dtoron_onehot[fixed_columns]

dtoron_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [171]:
dtoron_onehot.shape

(1283, 203)

In [172]:
dtoron_grouped = dtoron_onehot.groupby('Neighborhood').mean().reset_index()
dtoron_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,"Adelaide, King, Richmond",0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.0,0.076923,0.076923,0.076923,0.153846,0.153846,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0


In [173]:
num_top_venues = 5

for hood in dtoron_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = dtoron_grouped[dtoron_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.07
1                 Café  0.06
2           Steakhouse  0.04
3  American Restaurant  0.04
4      Thai Restaurant  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3            Beer Bar  0.04
4                Café  0.04


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0    Airport Lounge  0.15
1   Airport Service  0.15
2  Airport Terminal  0.15
3             Plane  0.08
4           Airport  0.08


----Cabbagetown, St. James Town----
               venue  freq
0        Coffee Shop  0.08
1         Restaurant  0.08
2        Pizza Place  0.06
3  Indian Restaurant  0.04
4             Bakery  0.04


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.15
1                 Café  0.07
2   Italian Re

In [175]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [176]:
num_top_venues = 10 # select the number of top venues in neighborhood

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dtoron_grouped['Neighborhood']

for ind in np.arange(dtoron_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dtoron_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Gym,Restaurant,Hotel,Cosmetics Shop,Bar
1,Berczy Park,Coffee Shop,Cocktail Bar,Farmers Market,Bakery,Cheese Shop,Steakhouse,Café,Restaurant,Seafood Restaurant,Beer Bar
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Terminal,Airport Service,Airport Lounge,Harbor / Marina,Airport Gate,Boat or Ferry,Plane,Sculpture Garden,Airport Food Court,Airport
3,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Pizza Place,Bakery,Park,Indian Restaurant,Café,Pet Store,Italian Restaurant,Pub
4,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Burger Joint,Japanese Restaurant,Bar,Bubble Tea Shop,Ice Cream Shop,Spa
5,"Chinatown, Grange Park, Kensington Market",Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bar,Mexican Restaurant,Vietnamese Restaurant,Bakery,Dumpling Restaurant,Coffee Shop,Gaming Cafe
6,Christie,Grocery Store,Café,Park,Italian Restaurant,Athletics & Sports,Diner,Baby Store,Nightclub,Convenience Store,Coffee Shop
7,Church and Wellesley,Japanese Restaurant,Coffee Shop,Gay Bar,Burger Joint,Sushi Restaurant,Restaurant,Café,Gastropub,Mediterranean Restaurant,Men's Store
8,"Commerce Court, Victoria Hotel",Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Italian Restaurant,Steakhouse,Gym,Seafood Restaurant,Deli / Bodega
9,"Design Exchange, Toronto Dominion Centre",Coffee Shop,Hotel,Café,American Restaurant,Gym,Sports Bar,Italian Restaurant,Deli / Bodega,Restaurant,Gastropub


## Cluster neightborhoods

In [177]:
from sklearn.cluster import KMeans


In [190]:
# set number of clusters
kclusters = 4

dtoron_grouped_clustering = dtoron_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dtoron_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 3, 0, 0, 0, 2, 0, 0, 0], dtype=int32)

In [185]:
dtoron_merged = dfg

# add clustering labels
dtoron_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dtoron_merged = dtoron_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

dtoron_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,0,Park,Trail,Playground,Women's Store,Deli / Bodega,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,0,Coffee Shop,Restaurant,Pizza Place,Bakery,Park,Indian Restaurant,Café,Pet Store,Italian Restaurant,Pub
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,2,Japanese Restaurant,Coffee Shop,Gay Bar,Burger Joint,Sushi Restaurant,Restaurant,Café,Gastropub,Mediterranean Restaurant,Men's Store
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,3,Coffee Shop,Park,Café,Bakery,Mexican Restaurant,Breakfast Spot,Theater,Pub,Gym / Fitness Center,Beer Store
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,3,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Bar,Japanese Restaurant,Bubble Tea Shop,Movie Theater,Pizza Place,Diner


## Map that shows the clusters of neighborhoods

In [182]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [198]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dtoron_merged['Latitude'], dtoron_merged['Longitude'],\
                                  dtoron_merged['Neighborhood'], dtoron_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine clusters in Downtown Toronto:

Cluster 1 (red): 8 neighborhoods, featuring coffee shop, cafe'

In [193]:
dtoron_merged.loc[dtoron_merged['Cluster Labels'] == 0,\
                     dtoron_merged.columns[[2] + list(range(5, dtoron_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rosedale,0,Park,Trail,Playground,Women's Store,Deli / Bodega,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
1,"Cabbagetown, St. James Town",0,Coffee Shop,Restaurant,Pizza Place,Bakery,Park,Indian Restaurant,Café,Pet Store,Italian Restaurant,Pub
5,St. James Town,0,Coffee Shop,Café,Restaurant,Hotel,Clothing Store,Cosmetics Shop,Cocktail Bar,Italian Restaurant,Bakery,Gastropub
7,Central Bay Street,0,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Burger Joint,Japanese Restaurant,Bar,Bubble Tea Shop,Ice Cream Shop,Spa
11,"Commerce Court, Victoria Hotel",0,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Italian Restaurant,Steakhouse,Gym,Seafood Restaurant,Deli / Bodega
15,Stn A PO Boxes 25 The Esplanade,0,Coffee Shop,Café,Restaurant,Hotel,Beer Bar,Seafood Restaurant,Cocktail Bar,Pub,Bakery,Burger Joint
16,"First Canadian Place, Underground city",0,Coffee Shop,Café,Hotel,Restaurant,Steakhouse,Bar,Deli / Bodega,Gastropub,Gym,American Restaurant
17,Christie,0,Grocery Store,Café,Park,Italian Restaurant,Athletics & Sports,Diner,Baby Store,Nightclub,Convenience Store,Coffee Shop


Cluster 2 (purple): a single neighborhood, featuring "airport"

In [194]:
dtoron_merged.loc[dtoron_merged['Cluster Labels'] == 1,\
                     dtoron_merged.columns[[2] + list(range(5, dtoron_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,"CN Tower, Bathurst Quay, Island airport, Harbo...",1,Airport Terminal,Airport Service,Airport Lounge,Harbor / Marina,Airport Gate,Boat or Ferry,Plane,Sculpture Garden,Airport Food Court,Airport


Cluster 3 (blue): a single neighborhood, featuring "Japanese restaurant"

In [195]:
dtoron_merged.loc[dtoron_merged['Cluster Labels'] == 2,\
                     dtoron_merged.columns[[2] + list(range(5, dtoron_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Church and Wellesley,2,Japanese Restaurant,Coffee Shop,Gay Bar,Burger Joint,Sushi Restaurant,Restaurant,Café,Gastropub,Mediterranean Restaurant,Men's Store


Cluster 4 (green): 7 neighborhoods, featuring "coffee shop, cafe', and hotel"

In [196]:
dtoron_merged.loc[dtoron_merged['Cluster Labels'] == 3,\
                     dtoron_merged.columns[[2] + list(range(5, dtoron_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,"Harbourfront, Regent Park",3,Coffee Shop,Park,Café,Bakery,Mexican Restaurant,Breakfast Spot,Theater,Pub,Gym / Fitness Center,Beer Store
4,"Ryerson, Garden District",3,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Bar,Japanese Restaurant,Bubble Tea Shop,Movie Theater,Pizza Place,Diner
8,"Adelaide, King, Richmond",3,Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Gym,Restaurant,Hotel,Cosmetics Shop,Bar
9,"Harbourfront East, Toronto Islands, Union Station",3,Coffee Shop,Hotel,Pizza Place,Café,Aquarium,Brewery,Scenic Lookout,Italian Restaurant,Sports Bar,Steakhouse
10,"Design Exchange, Toronto Dominion Centre",3,Coffee Shop,Hotel,Café,American Restaurant,Gym,Sports Bar,Italian Restaurant,Deli / Bodega,Restaurant,Gastropub
12,"Harbord, University of Toronto",3,Café,Bookstore,Bakery,Bar,Restaurant,Japanese Restaurant,Coffee Shop,Comfort Food Restaurant,Poutine Place,Pub
13,"Chinatown, Grange Park, Kensington Market",3,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bar,Mexican Restaurant,Vietnamese Restaurant,Bakery,Dumpling Restaurant,Coffee Shop,Gaming Cafe
