In [52]:
#import library
#!pip install beautifulsoup4
#!pip install lxml
#!pip install request
#!pip install geocoder
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import json
from pandas.io.json import json_normalize


In [32]:
#use beautiful soup to grasp the table
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml').body
#print(soup.prettify())
table=soup.find('table',class_='wikitable sortable')
#print(table)
df_list = []
df_list.append(pd.concat(pd.read_html(table.prettify())))
df = pd.concat(df_list)

In [33]:
#Clean the table
df=df[df['Borough']!='Not assigned']
df.reset_index(drop=True,inplace=True)
#check if there is NaN in Neigborhood column
df['Neighborhood'].isnull().value_counts()


False    103
Name: Neighborhood, dtype: int64

No NaN in Neigborhood column, so we display the shape of the df

In [26]:
df.shape

(103, 3)

In [27]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Now we read in the geo data and combine the df with geo data

In [28]:
# read goe data
geodata=pd.read_csv('https://cocl.us/Geospatial_data')

In [35]:
#combine two dataframes
df_final=pd.merge(df,geodata,how='left',on='Postal Code')
df_final.sort_values(by='Postal Code')
df_final.head(13)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


Next we want to do explore and cluster the neighborhoods in the city of Toronto. First we take a look at the neighbourhood on map.

In [36]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [40]:
#!pip install folium
import folium
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_final['Latitude'], df_final['Longitude'], df_final['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

#### Define Foursquare Credentials and Version

In [44]:
CLIENT_ID = 'VCFQR0E1BTCZWQVRND3OYBOZY5FJDY0YT2RN35CRKJKJEQP4' # your Foursquare ID
CLIENT_SECRET = 'FNCWVTOWKKLIHTHLFYKGYQCT5BDFW2BWYSUUY1AMLM0CQULU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VCFQR0E1BTCZWQVRND3OYBOZY5FJDY0YT2RN35CRKJKJEQP4
CLIENT_SECRET:FNCWVTOWKKLIHTHLFYKGYQCT5BDFW2BWYSUUY1AMLM0CQULU


#### Let's explore the first neighborhood in our dataframe.

Get the neighborhood's latitude and longitude values.

In [42]:
neighborhood_latitude = df_final.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_final.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_final.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


#### Now, let's get the top 100 venues that are in Parkwoods within a radius of 2500 meters.

We define the url first:

In [63]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 2500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=VCFQR0E1BTCZWQVRND3OYBOZY5FJDY0YT2RN35CRKJKJEQP4&client_secret=FNCWVTOWKKLIHTHLFYKGYQCT5BDFW2BWYSUUY1AMLM0CQULU&v=20180605&ll=43.7532586,-79.3296565&radius=2500&limit=100'

Get result from that url:

In [64]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ebb4d0cbae9a2001b81f73b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 198,
  'suggestedBounds': {'ne': {'lat': 43.77575862250003,
    'lng': -79.29856519924638},
   'sw': {'lat': 43.73075857749998, 'lng': -79.36074780075361}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b8991cbf964a520814232e3',
       'name': "Allwyn's Bakery",
       'location': {'address': '81 Underhill drive',
        'lat': 43.75984035203157,
        'lng': -79.32471879917513,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.75984035203157,
   

Define get_category_type function:

In [65]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Get the result:

In [66]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()


Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Donalda Golf & Country Club,Golf Course,43.752816,-79.342741
2,Brookbanks Park,Park,43.751976,-79.33214
3,Island Foods,Caribbean Restaurant,43.745866,-79.346035
4,Galleria Supermarket,Supermarket,43.75352,-79.349518


Shape of the first neighbourhood 100 top venues:

In [67]:
nearby_venues.shape

(100, 4)

Next we want to work with only boroughs that contain the word Toronto. First we generate the data set:

In [141]:
Toronto_data= df_final[df_final['Borough'].str.contains('Toronto')]
Toronto_data=Toronto_data.sort_values(by='Borough')
Toronto_data.reset_index(drop=True,inplace=True)
Toronto_data.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5N,Central Toronto,Roselawn,43.711695,-79.416936
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M5P,Central Toronto,Forest Hill North & West,43.696948,-79.411307
3,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
4,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
5,M4S,Central Toronto,Davisville,43.704324,-79.38879
6,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
7,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
8,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
9,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049


In [142]:
Toronto_data.shape

(39, 5)

Define a function to loop over all neighbourhood

In [123]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Create a newdataframe called toronto_venues to record all venues 

In [124]:
toronto_venues = getNearbyVenues(names=Toronto_data['Neighborhood'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude']
                                  )

Roselawn
Davisville North
Forest Hill North & West
North Toronto West
The Annex, North Midtown, Yorkville
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Lawrence Park
Kensington Market, Chinatown, Grange Park
Church and Wellesley
University of Toronto, Harbord
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Regent Park, Harbourfront
Commerce Court, Victoria Hotel
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
Toronto Dominion Centre, Design Exchange
St. James Town
Berczy Park
Central Bay Street
Harbourfront East, Union Station, Toronto Islands
Christie
Richmond, Adelaide, King
The Beaches
Business reply mail Processing Centre
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Dufferin, Dovercourt Village
Parkdale, Roncesvalles
Little P

Check the size of the resulting dataframe

In [125]:
print(toronto_venues.shape)
toronto_venues.head()

(3202, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Roselawn,43.711695,-79.416936,The Abbot,43.703688,-79.413485,Gastropub
1,Roselawn,43.711695,-79.416936,Hotel Gelato,43.703478,-79.414311,Café
2,Roselawn,43.711695,-79.416936,The Mad Bean Coffee House,43.703529,-79.413698,Coffee Shop
3,Roselawn,43.711695,-79.416936,7 Numbers,43.70363,-79.413724,Italian Restaurant
4,Roselawn,43.711695,-79.416936,Ferraro,43.703655,-79.413167,Italian Restaurant


In [126]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,100,100,100,100,100,100
"Brockton, Parkdale Village, Exhibition Place",100,100,100,100,100,100
Business reply mail Processing Centre,49,49,49,49,49,49
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16
Central Bay Street,100,100,100,100,100,100
Christie,100,100,100,100,100,100
Church and Wellesley,100,100,100,100,100,100
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,100,100,100,100,100,100
Davisville North,100,100,100,100,100,100


In [127]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 274 uniques categories.


## Analyze Each Neighborhood

In [128]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,neighborhood,Accessories Store,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Zoo
0,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [129]:
toronto_grouped = toronto_onehot.groupby('neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,neighborhood,Accessories Store,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Zoo
0,Berczy Park,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing Centre,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,...,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
5,Christie,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0


In [131]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [198]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Beer Bar,Park,Art Gallery,Liquor Store,Cocktail Bar
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Restaurant,Bar,Bakery,Furniture / Home Store,Gift Shop,Tibetan Restaurant,Park,Soccer Stadium
2,Business reply mail Processing Centre,Park,Coffee Shop,Pizza Place,Brewery,Sushi Restaurant,Italian Restaurant,Bakery,Fast Food Restaurant,Bistro,Pub
3,"CN Tower, King and Spadina, Railway Lands, Har...",Café,Coffee Shop,Harbor / Marina,Dance Studio,Scenic Lookout,Dog Run,Park,Track,Sushi Restaurant,Garden
4,Central Bay Street,Coffee Shop,Café,Ramen Restaurant,Park,Gastropub,Japanese Restaurant,Clothing Store,Cosmetics Shop,Italian Restaurant,Sushi Restaurant


In [133]:
neighborhoods_venues_sorted.shape

(39, 11)

## K-means clustering

In [185]:
from sklearn.cluster import KMeans

In [199]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 1], dtype=int32)

In [200]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = Toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')



In [195]:
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5N,Central Toronto,Roselawn,43.711695,-79.416936,0,Sushi Restaurant,Italian Restaurant,Coffee Shop,Pharmacy,Bank,Café,Pilates Studio,Gym Pool,Pet Store,Dance Studio
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Coffee Shop,Italian Restaurant,Restaurant,Café,Pizza Place,Sushi Restaurant,Gym,Dessert Shop,Pharmacy,Food & Drink Shop
2,M5P,Central Toronto,Forest Hill North & West,43.696948,-79.411307,0,Park,Coffee Shop,Bank,Café,Pharmacy,Japanese Restaurant,Skating Rink,Burger Joint,Trail,Sushi Restaurant
3,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Italian Restaurant,Café,Skating Rink,Mexican Restaurant,Clothing Store,Park,Sporting Goods Shop,Diner,Restaurant
4,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Italian Restaurant,Bakery,Grocery Store,Gym,Restaurant,Museum,Pizza Place


In [201]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [202]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

#### Cluster 1

In [203]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,0,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Italian Restaurant,Bakery,Grocery Store,Gym,Restaurant,Museum,Pizza Place
9,Downtown Toronto,0,Café,Bar,Coffee Shop,Vegetarian / Vegan Restaurant,Art Gallery,Yoga Studio,Bakery,Mexican Restaurant,Record Shop,Caribbean Restaurant
11,Downtown Toronto,0,Café,Bakery,Coffee Shop,Bar,Restaurant,Bookstore,Vegetarian / Vegan Restaurant,Mexican Restaurant,Japanese Restaurant,Burrito Place
12,Downtown Toronto,0,Café,Coffee Shop,Harbor / Marina,Dance Studio,Scenic Lookout,Dog Run,Park,Track,Sushi Restaurant,Garden
14,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Japanese Restaurant,Cocktail Bar,Seafood Restaurant,Hotel,Gastropub,Beer Bar,Park
15,Downtown Toronto,0,Park,Diner,Japanese Restaurant,Restaurant,Gastropub,Café,Steakhouse,Caribbean Restaurant,Taiwanese Restaurant,Bakery
16,Downtown Toronto,0,Coffee Shop,Café,Hotel,Theater,Japanese Restaurant,Concert Hall,Restaurant,Park,Seafood Restaurant,Thai Restaurant
18,Downtown Toronto,0,Coffee Shop,Café,Japanese Restaurant,Hotel,Restaurant,Concert Hall,Theater,Seafood Restaurant,Gastropub,Cosmetics Shop
20,Downtown Toronto,0,Coffee Shop,Gastropub,Japanese Restaurant,Café,Hotel,Theater,Italian Restaurant,Restaurant,Diner,Middle Eastern Restaurant
21,Downtown Toronto,0,Coffee Shop,Café,Hotel,Japanese Restaurant,Theater,Concert Hall,Restaurant,Park,Seafood Restaurant,Thai Restaurant


#### Cluster 2

In [204]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,1,Sushi Restaurant,Italian Restaurant,Coffee Shop,Pharmacy,Bank,Café,Pilates Studio,Gym Pool,Pet Store,Dance Studio
1,Central Toronto,1,Coffee Shop,Italian Restaurant,Restaurant,Café,Pizza Place,Sushi Restaurant,Gym,Dessert Shop,Pharmacy,Food & Drink Shop
2,Central Toronto,1,Park,Coffee Shop,Bank,Café,Pharmacy,Japanese Restaurant,Skating Rink,Burger Joint,Trail,Sushi Restaurant
3,Central Toronto,1,Coffee Shop,Italian Restaurant,Café,Skating Rink,Mexican Restaurant,Clothing Store,Park,Sporting Goods Shop,Diner,Restaurant
5,Central Toronto,1,Italian Restaurant,Coffee Shop,Sushi Restaurant,Café,Pizza Place,Middle Eastern Restaurant,Indian Restaurant,Dessert Shop,Gym,Restaurant
6,Central Toronto,1,Coffee Shop,Park,Grocery Store,Italian Restaurant,Thai Restaurant,Restaurant,Gym,Sushi Restaurant,Bank,Bagel Shop
7,Central Toronto,1,Coffee Shop,Sushi Restaurant,Italian Restaurant,Park,Thai Restaurant,Gym / Fitness Center,Grocery Store,Restaurant,Bagel Shop,Liquor Store
10,Downtown Toronto,1,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Park,Café,Italian Restaurant,Gastropub,Bookstore,Thai Restaurant,Men's Store
13,Downtown Toronto,1,Coffee Shop,Park,Grocery Store,Filipino Restaurant,Breakfast Spot,Bistro,Bank,Candy Store,BBQ Joint,Athletics & Sports
17,Downtown Toronto,1,Coffee Shop,Diner,Park,Café,Restaurant,Theater,Bakery,Pub,Breakfast Spot,Italian Restaurant


#### Cluster 3

In [205]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,2,Coffee Shop,Trail,Bus Line,Park,Bookstore,College Quad,College Gym,Gym / Fitness Center,Café,Electronics Store
