# Segmenting and Clustering Neighborhoods in Toronto

## 1. Scraping Neighborhood Data from wiki page

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

### 1.1 Scraping wiki page into dataframe

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req = requests.get(url)
soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]

df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### 1.2 Removing cells with 'Not assigned' borough

In [3]:
index_boroughNotAssigned = df[df.Borough == 'Not assigned'].index
df.drop(index_boroughNotAssigned, inplace = True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### 1.3 Assigning 'Not assigned' neighborhood by its borough name

In [4]:
index_neigbourhoodNotAssigned = df[df.Neighbourhood == 'Not assigned'].index
df.Neighbourhood[index_neigbourhoodNotAssigned] = df.Borough[index_neigbourhoodNotAssigned]
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### 1.4 Combining neighborhoods in one postcode area

In [5]:
grouped = df.groupby(["Postcode", "Borough"])["Neighbourhood"].apply(', '.join)
df_grouped = pd.DataFrame(data = grouped).reset_index()
df_grouped.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [6]:
print("There are %d rows in combined data frame" % df_grouped.shape[0])

There are 103 rows in combined data frame


## 2. Build Neighborhood Dataframe with Geographical Coordinates

### 2.1 Building a data frame containing podes of areas and corresponding geo-coordinates

#### Option 1. Using geocoder
since geocoder doesn't work reliably, we'll not use this option

In [7]:
#!pip install geocoder # install geocoder in case it's not installed

# defining a function to extract latitude and longitude coordinate for given postal code
def get_coordinates(postal_code):
    
    import geocoder # import geocoder
    
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    return lat_lng_coords[0], lat_lng_coords[1]

# building data frame df_coordinates
df_coordinates = pd.DataFrame(columns= ['Postcode', 'Latitude', 'Longitude'])

for postal_code in df_grouped["Postcode"]:
    latitude, longitude = get_coordinates(postal_code)
    df_coordinates.append({'Postcode': postal_code, 'Latitude': latitude, 'Longitude': longitude}, ignore_index=True)


#### Option 2. Extracting geographical coordinates of each postal code from .csv file

In [7]:
column_names= ['Postcode', 'Latitude', 'Longitude']

df_coordinates = pd.read_csv('Geospatial_Coordinates.csv', names = column_names)

### 2.2 Merging/Joining dataframe df_grouped with df_coordinates

In [8]:
df_grouped_geodata = df_grouped.join(df_coordinates.set_index('Postcode'), on='Postcode')

### 2.3 Checking the final data frame

In [9]:
df_grouped_geodata.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8066863,-79.1943534
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845351,-79.1604971
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7635726,-79.1887115
3,M1G,Scarborough,Woburn,43.7709921,-79.2169174
4,M1H,Scarborough,Cedarbrae,43.773136,-79.2394761
5,M1J,Scarborough,Scarborough Village,43.7447342,-79.2394761
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.7279292,-79.2620294
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.7111117,-79.2845772
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.2394761
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.2648481


In [10]:
print('The dataframe has {} postal codes and {} boroughs.'.format(
        df_grouped_geodata.shape[0],
        len(df_grouped_geodata['Borough'].unique())        
    )
)

The dataframe has 103 postal codes and 11 boroughs.


## 3. Explore and Clustering Neighborhoods in Toronto

### 3.1 Import necessary libraries

In [11]:
#!conda install -c conda-forge geopy --yes  # uncomment this line if no geopy package was installed
from geopy.geocoders import Nominatim 

import numpy as np

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes  # uncomment this line if no folium package was installed
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### 3.2 Creating Toronto map with neighborhoods superimposed on top
#### 3.2.1 Using geopy to get latitude and longitude coordinates of Totonto

In [13]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


#### 3.2.2 Creating Toronto map

In [14]:
df_toronto = df_grouped_geodata
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, postal_code, borough, neighborhood in zip(df_toronto['Latitude'].astype('float'), df_toronto['Longitude'].astype('float'), df_toronto['Postcode'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 3.3 Exploring neighborhoods in Toronto
#### 3.3.1 Defining the Foursquare credentials and version

In [18]:
CLIENT_ID = 'EO0OXPAF2MCQV0FCIWOFEYJTMHFFUO5LOUSCISAPVTSMIBOE' # your Foursquare ID
CLIENT_SECRET = '03XQEA4DMTYFEQWODF3N1RHR0VYPPPDU3LV1AVOVOOZGCIRU' # your Foursquare Secret
VERSION = '20191028' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EO0OXPAF2MCQV0FCIWOFEYJTMHFFUO5LOUSCISAPVTSMIBOE
CLIENT_SECRET:03XQEA4DMTYFEQWODF3N1RHR0VYPPPDU3LV1AVOVOOZGCIRU


#### 3.3.2 Creating a function to repeat the process extracting venues nearby neighborhoods in a postal-code area

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### 3.3.3 Extracting venues nearby each area of postal code and creating a data frame called totonto_venues

In [20]:
toronto_venues = getNearbyVenues(names= df_toronto['Postcode'],
                                   latitudes= df_toronto['Latitude'],
                                   longitudes= df_toronto['Longitude']
                                  )

print('%d venues were returned for all areas in Toronto' % toronto_venues.shape[0])
toronto_venues.head()

2269 venues were returned for all areas in Toronto


Unnamed: 0,Postcode,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.8066863,-79.1943534,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1C,43.7845351,-79.1604971,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1E,43.7635726,-79.1887115,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,M1E,43.7635726,-79.1887115,G & G Electronics,43.765309,-79.191537,Electronics Store
4,M1E,43.7635726,-79.1887115,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


#### 3.3.4 Checking how many venues were found for each area of postal code in Toronto

In [21]:
toronto_venues.groupby('Postcode').count()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,1,1,1,1,1,1
M1C,1,1,1,1,1,1
M1E,8,8,8,8,8,8
M1G,3,3,3,3,3,3
M1H,8,8,8,8,8,8
M1J,1,1,1,1,1,1
M1K,6,6,6,6,6,6
M1L,9,9,9,9,9,9
M1M,3,3,3,3,3,3
M1N,4,4,4,4,4,4


#### 3.3.5 Check how many unique venue categories found

In [22]:
print('There are {} uniques categories of venues found in Toronto.'.format(len(toronto_venues['Venue Category'].unique())))

There are 268 uniques categories of venues found in Toronto.


### 3.4 Analyzing each postal-code area
#### 3.4.1 One-hot encoding

In [23]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add postal-code column back to dataframe
toronto_onehot['Postcode'] = toronto_venues['Postcode'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# check size of new dataframe
print('The new dataframe has {} rows and {} columns.'.format(toronto_onehot.shape[0], toronto_onehot.shape[1]))

toronto_onehot.head()

The new dataframe has 2269 rows and 269 columns.


Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### 3.4.2 grouping rows by area and by taking the mean of the frequency of occurrence of each category

In [24]:
toronto_grouped = toronto_onehot.groupby('Postcode').mean().reset_index()

print('There are {} rows and {} columns in dataframe {}'.format(toronto_grouped.shape[0], toronto_grouped.shape[1], 'toronto_grouped'))

toronto_grouped.head()

There are 100 rows and 269 columns in dataframe toronto_grouped


Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 3.4.3 printing the top 5 most common venues for each area

In [25]:
num_top_venues = 5

for postal_code in toronto_grouped['Postcode']:
    print("----"+postal_code+"----")
    temp = toronto_grouped[toronto_grouped['Postcode'] == postal_code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                  venue  freq
0  Fast Food Restaurant   1.0
1                Museum   0.0
2                Market   0.0
3        Massage Studio   0.0
4        Medical Center   0.0


----M1C----
                       venue  freq
0                        Bar   1.0
1          Accessories Store   0.0
2  Middle Eastern Restaurant   0.0
3                      Motel   0.0
4        Monument / Landmark   0.0


----M1E----
                 venue  freq
0    Electronics Store  0.25
1          Pizza Place  0.12
2  Rental Car Location  0.12
3         Intersection  0.12
4   Mexican Restaurant  0.12


----M1G----
                 venue  freq
0          Coffee Shop  0.67
1    Korean Restaurant  0.33
2    Accessories Store  0.00
3                Motel  0.00
4  Monument / Landmark  0.00


----M1H----
                  venue  freq
0      Hakka Restaurant  0.12
1    Athletics & Sports  0.12
2  Caribbean Restaurant  0.12
3   Fried Chicken Joint  0.12
4                  Bank  0.12


----M1J----


#### 3.4.3 Building a new dataframe containing the top 10 venues for each area of postal code

In [26]:
# function to sort venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
# create dataframe containing the top 10 venues for each area
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postcode'] = toronto_grouped['Postcode']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Yoga Studio,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
1,M1C,Bar,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Farmers Market
2,M1E,Electronics Store,Rental Car Location,Breakfast Spot,Medical Center,Mexican Restaurant,Intersection,Pizza Place,Concert Hall,Construction & Landscaping,Eastern European Restaurant
3,M1G,Coffee Shop,Korean Restaurant,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Dumpling Restaurant
4,M1H,Lounge,Caribbean Restaurant,Bakery,Bank,Athletics & Sports,Thai Restaurant,Fried Chicken Joint,Hakka Restaurant,Dumpling Restaurant,Drugstore


### 3.5 Clustering postal-code areas
#### 3.5.1 running k-Means to cluster the areas into 5 groups

In [28]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Postcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:]

array([2, 0, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3,
       0, 3, 0, 3, 3, 3, 3, 0, 0, 4, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 0,
       3, 3, 3, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 0, 4, 3, 3, 3, 4, 1, 3, 0, 3, 3])

#### 3.5.2 Visualizing the resulting clusters

Adding top venues and resulting cluster labels to dataframe and merging it with df_toronto to add latitude and longitude coordinates

In [29]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge df_toronto with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Postcode'), on='Postcode')

toronto_merged.dropna(inplace=True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype('int32')

toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.8066863,-79.1943534,2,Fast Food Restaurant,Yoga Studio,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845351,-79.1604971,0,Bar,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Farmers Market
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7635726,-79.1887115,3,Electronics Store,Rental Car Location,Breakfast Spot,Medical Center,Mexican Restaurant,Intersection,Pizza Place,Concert Hall,Construction & Landscaping,Eastern European Restaurant
3,M1G,Scarborough,Woburn,43.7709921,-79.2169174,3,Coffee Shop,Korean Restaurant,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Dumpling Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.2394761,3,Lounge,Caribbean Restaurant,Bakery,Bank,Athletics & Sports,Thai Restaurant,Fried Chicken Joint,Hakka Restaurant,Dumpling Restaurant,Drugstore


Visualizing the clusters on Toronto map

In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'].astype('float'), toronto_merged['Longitude'].astype('float'), toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### 3.5.3 Examining the resulting clusters
_Cluster 1_

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Highland Creek, Rouge Hill, Port Union",Bar,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Farmers Market
7,"Clairlea, Golden Mile, Oakridge",Bakery,Bus Line,Park,Soccer Field,Intersection,Fast Food Restaurant,Bus Station,Yoga Studio,Dog Run,Discount Store
14,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Coffee Shop,Park,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
23,York Mills West,Convenience Store,Park,Electronics Store,Bank,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
25,Parkwoods,Bus Stop,Park,Food & Drink Shop,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Yoga Studio
30,"CFB Toronto, Downsview East",Construction & Landscaping,Airport,Park,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
31,Downsview West,Grocery Store,Park,Bank,Hotel,Shopping Mall,Convenience Store,Yoga Studio,Dog Run,Dim Sum Restaurant,Diner
40,East Toronto,Convenience Store,Coffee Shop,Park,Yoga Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
44,Lawrence Park,Gym / Fitness Center,Park,Bus Line,Swim School,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
48,"Moore Park, Summerhill East",Gym,Trail,Playground,Park,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run


_Cluster 2_

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,Weston,Convenience Store,Yoga Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Deli / Bodega


_Cluster 3_

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge, Malvern",Fast Food Restaurant,Yoga Studio,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore


_Cluster 4_

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Guildwood, Morningside, West Hill",Electronics Store,Rental Car Location,Breakfast Spot,Medical Center,Mexican Restaurant,Intersection,Pizza Place,Concert Hall,Construction & Landscaping,Eastern European Restaurant
3,Woburn,Coffee Shop,Korean Restaurant,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Dumpling Restaurant
4,Cedarbrae,Lounge,Caribbean Restaurant,Bakery,Bank,Athletics & Sports,Thai Restaurant,Fried Chicken Joint,Hakka Restaurant,Dumpling Restaurant,Drugstore
5,Scarborough Village,Playground,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run
6,"East Birchmount Park, Ionview, Kennedy Park",Bus Station,Coffee Shop,Discount Store,Hobby Shop,Department Store,Convenience Store,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner
8,"Cliffcrest, Cliffside, Scarborough Village West",Movie Theater,American Restaurant,Motel,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
9,"Birch Cliff, Cliffside West",Café,College Stadium,Skating Rink,General Entertainment,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Electronics Store,Dance Studio
10,"Dorset Park, Scarborough Town Centre, Wexford ...",Indian Restaurant,Pet Store,Chinese Restaurant,Latin American Restaurant,Vietnamese Restaurant,Gaming Cafe,Department Store,Dessert Shop,Dim Sum Restaurant,Diner
11,"Maryvale, Wexford",Auto Garage,Breakfast Spot,Sandwich Place,Bakery,Vietnamese Restaurant,Middle Eastern Restaurant,Dog Run,Dim Sum Restaurant,Diner,Discount Store
12,Agincourt,Chinese Restaurant,Breakfast Spot,Sandwich Place,Skating Rink,Lounge,Dumpling Restaurant,Drugstore,Eastern European Restaurant,Donut Shop,Department Store


_Cluster 5_

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Downsview Central,Baseball Field,Food Truck,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Falafel Restaurant
91,"Humber Bay, King's Mill Park, Kingsway Park So...",Deli / Bodega,Baseball Field,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio,Dessert Shop
97,"Emery, Humberlea",Baseball Field,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
