# Clustering Toronto Neighborhoods
### Toronto Neighborhoods

In [22]:
import pandas as pd
import numpy as np

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

## Import Data

In [6]:
neighborhoods = pd.read_csv('Dataset_PC_with_Coord.csv')
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.810154,-79.194603
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784672,-79.158958
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.766289,-79.17289
3,M1G,Scarborough,Woburn,43.768288,-79.214111
4,M1H,Scarborough,Cedarbrae,43.76918,-79.23877


## Create Map

In [11]:
latitude = 43.768288
longitude = -79.214111

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Segmentation

### Foursquare Credentials

In [12]:
CLIENT_ID = 'ATMQSFMEF5VPDE4QOFG3JE0X3KYIXUXD4PZUWBIVQAMVQZGD' # your Foursquare ID
CLIENT_SECRET = '5JQB0K3JKS3FH43VHE50VGW40RLYXZSPYZ4XFJAN34FPLR21' # your Foursquare Secret
VERSION = '20200329' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ATMQSFMEF5VPDE4QOFG3JE0X3KYIXUXD4PZUWBIVQAMVQZGD
CLIENT_SECRET:5JQB0K3JKS3FH43VHE50VGW40RLYXZSPYZ4XFJAN34FPLR21


### Exploring the first neighborhood in dataset

In [13]:
dataset.loc[0, 'Neighborhood']

'Malvern, Rouge'

In [14]:
neighborhood_latitude = dataset.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = dataset.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = dataset.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.81015396118164, -79.1946029663086.


### Get top 100 venues in neighborhood

In [None]:
https://api.foursquare.com/v2/venues/search?client_id=ATMQSFMEF5VPDE4QOFG3JE0X3KYIXUXD4PZUWBIVQAMVQZGD&client_secret=5JQB0K3JKS3FH43VHE50VGW40RLYXZSPYZ4XFJAN34FPLR21&v=20200326&ll=40.73,-74.01&query=coffee

In [65]:
info_type = 'venues'
endpoint = 'search'
ll = '{},{}'.format(neighborhood_latitude,neighborhood_longitude)
intent = 'browse'
radius = '500'
LIMIT = '100'

url = 'https://api.foursquare.com/v2/{}/{}?client_id={}&client_secret={}&v={}&ll={}&intent={}&radius={}&limit={}'.format(
    info_type,
    endpoint,
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    ll,
    intent,
    radius,
    limit)

In [66]:
# GET data
results = requests.get(url).json()

### Transform JSON results into dataframe

In [67]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [68]:
venues = results['response']['venues']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]


print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))
nearby_venues.sample(5)

100 venues were returned by Foursquare.


Unnamed: 0,name,categories,lat,lng
60,Extendicare Rouge Valley,Medical Center,43.803974,-79.189386
30,Auto Camping,Automotive Shop,43.807896,-79.199733
53,Blue Ocean Nails & Spa,Nail Salon,43.801053,-79.19902
32,Pakeer Sahadevan - Re/MAX Community Realty Inc.,,43.802551,-79.199422
51,T 1 Trading,Factory,43.80854,-79.195511


### Function for all neighborhoods

In [69]:
info_type = 'venues'
endpoint = 'search'
ll = '{},{}'.format(neighborhood_latitude,neighborhood_longitude)
intent = 'browse'
radius = '500'
LIMIT = '100'

In [70]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
# Get venue data for each neighborhood
toronto_venues = getNearbyVenues(names=dataset['Neighborhood'],
                                   latitudes=dataset['Latitude'],
                                   longitudes=dataset['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills


In [83]:
# What's the shape of the resuting dataset?
print(toronto_venues.shape)
toronto_venues.head()

(2364, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.810154,-79.194603,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784672,-79.158958,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
3,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
4,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Heron Park,43.769327,-79.177201,Park


In [81]:
# How many veneues were returned per neighborhood?
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,20,20,20,20,20,20
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",3,3,3,3,3,3
Bayview Village,3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
...,...,...,...,...,...,...
WillowdaleWest,8,8,8,8,8,8
Woburn,3,3,3,3,3,3
Woodbine Heights,7,7,7,7,7,7
York Mills West,4,4,4,4,4,4


In [84]:
# How many unique categories do we have?
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 262 uniques categories.


In [86]:
# View the resulting dataset
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.810154,-79.194603,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784672,-79.158958,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
3,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
4,"Guildwood, Morningside, West Hill",43.766289,-79.17289,Heron Park,43.769327,-79.177201,Park


## Preprocess Neighborhoods

In [102]:
# one hot encoding
toronto_one_hot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_one_hot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = ['Neighborhood'] + list(toronto_one_hot.drop(columns=['Neighborhood']).columns)
toronto_one_hot = toronto_one_hot[fixed_columns]

toronto_one_hot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,...,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [103]:
# What's the shape of resulting df?
toronto_one_hot.shape

(2364, 262)

In [104]:
# Group neighborhoods by frequency of occorency of venues categories
toronto_grouped = toronto_one_hot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,...,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,WillowdaleWest,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
98,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
99,York Mills West,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0


### Dataframe of most common venues per neighborhood

In [107]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

In [108]:
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Coffee Shop,Hong Kong Restaurant,Badminton Court,Shanghai Restaurant,Bubble Tea Shop,Sandwich Place,Supermarket,Sushi Restaurant,Discount Store
1,"Alderwood, Long Branch",Convenience Store,Pizza Place,Gas Station,Pharmacy,Sandwich Place,Gym,Athletics & Sports,Coffee Shop,Pub,Electronics Store
2,"Bathurst Manor, Wilson Heights, Downsview North",Bar,IT Services,Park,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Event Space
3,Bayview Village,Construction & Landscaping,Golf Driving Range,Trail,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Thai Restaurant,Pharmacy,Indian Restaurant,Café,Fast Food Restaurant,Liquor Store,Restaurant


### Cluster Neighborhoods

Run K-Means to create 5 clusters

In [None]:
# set number of clusters
kclusters = 5

toronto_grouped_cluster = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [111]:
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Coffee Shop,Hong Kong Restaurant,Badminton Court,Shanghai Restaurant,Bubble Tea Shop,Sandwich Place,Supermarket,Sushi Restaurant,Discount Store
1,"Alderwood, Long Branch",Convenience Store,Pizza Place,Gas Station,Pharmacy,Sandwich Place,Gym,Athletics & Sports,Coffee Shop,Pub,Electronics Store
2,"Bathurst Manor, Wilson Heights, Downsview North",Bar,IT Services,Park,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Event Space
3,Bayview Village,Construction & Landscaping,Golf Driving Range,Trail,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Thai Restaurant,Pharmacy,Indian Restaurant,Café,Fast Food Restaurant,Liquor Store,Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
96,WillowdaleWest,Coffee Shop,Convenience Store,Shopping Mall,Sandwich Place,Eastern European Restaurant,Pizza Place,Bus Line,Falafel Restaurant,Event Space,Farm
97,Woburn,Soccer Field,Business Service,Korean Restaurant,Yoga Studio,Dumpling Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
98,Woodbine Heights,Thrift / Vintage Store,Gas Station,Bar,Breakfast Spot,Arts & Crafts Store,Café,Sushi Restaurant,Yoga Studio,Elementary School,Empanada Restaurant
99,York Mills West,Convenience Store,Park,Speakeasy,Bank,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant


In [112]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# Drop N/A clusters
toronto_merged = toronto_merged.dropna(subset=['Cluster Labels'])

# Convert cluster labes to int
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.810154,-79.194603,2.0,Fast Food Restaurant,Yoga Studio,Flea Market,Fish & Chips Shop,Field,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784672,-79.158958,3.0,Bar,Donut Shop,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.766289,-79.17289,1.0,Gym / Fitness Center,Construction & Landscaping,Athletics & Sports,Park,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
3,M1G,Scarborough,Woburn,43.768288,-79.214111,3.0,Soccer Field,Business Service,Korean Restaurant,Yoga Studio,Dumpling Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
4,M1H,Scarborough,Cedarbrae,43.76918,-79.23877,3.0,Construction & Landscaping,Gaming Cafe,Trail,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Event Space,Donut Shop


In [144]:
toronto_merged.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,M9N,York,Weston,43.70557,-79.518616,1,Thai Restaurant,Farmers Market,Diner,Pharmacy,Park,Fast Food Restaurant,Farm,Falafel Restaurant,Event Space,Dog Run
99,M9P,Etobicoke,Westmount,43.696484,-79.529266,0,Pizza Place,Middle Eastern Restaurant,Sandwich Place,Coffee Shop,Chinese Restaurant,Farm,Falafel Restaurant,Event Space,Donut Shop,Empanada Restaurant
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.684505,-79.557915,3,Beach Bar,Home Service,Music Venue,Arts & Crafts Store,Gym,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.742935,-79.584831,3,Grocery Store,Fast Food Restaurant,Pizza Place,Beer Store,Sushi Restaurant,Sandwich Place,Caribbean Restaurant,Park,Gym Pool,Pharmacy
102,M9W,EtobicokeNorthwest,"Clairville, Humberwood, Woodbine Downs, West H...",43.711552,-79.584526,0,Gym,Coffee Shop,Gas Station,Sandwich Place,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant,Donut Shop


### Visualize results

In [138]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Individual Clusters

In [139]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Scarborough,0,Shopping Mall,Pizza Place,Pharmacy,Coffee Shop,Yoga Studio,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant
24,North York,0,Coffee Shop,Convenience Store,Shopping Mall,Sandwich Place,Eastern European Restaurant,Pizza Place,Bus Line,Falafel Restaurant,Event Space,Farm
48,Central Toronto,0,Convenience Store,Gym,Playground,Historic Site,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
89,Etobicoke,0,Convenience Store,Pizza Place,Gas Station,Pharmacy,Sandwich Place,Gym,Athletics & Sports,Coffee Shop,Pub,Electronics Store
94,Etobicoke,0,Pizza Place,Chinese Restaurant,Coffee Shop,Tea Room,Empanada Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
99,Etobicoke,0,Pizza Place,Middle Eastern Restaurant,Sandwich Place,Coffee Shop,Chinese Restaurant,Farm,Falafel Restaurant,Event Space,Donut Shop,Empanada Restaurant
102,EtobicokeNorthwest,0,Gym,Coffee Shop,Gas Station,Sandwich Place,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant,Donut Shop


In [140]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1,Gym / Fitness Center,Construction & Landscaping,Athletics & Sports,Park,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
9,Scarborough,1,Park,College Stadium,General Entertainment,Skating Rink,Gym Pool,Gym,Empanada Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
14,Scarborough,1,Playground,Park,Pharmacy,Gym,Shopping Plaza,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant
20,North York,1,Park,Yoga Studio,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
23,North York,1,Convenience Store,Park,Speakeasy,Bank,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
25,North York,1,Park,Bed & Breakfast,Food & Drink Shop,Pet Store,Yoga Studio,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
28,North York,1,Bar,IT Services,Park,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Event Space
30,North York,1,Park,Airport,Food Court,Coffee Shop,Yoga Studio,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space
34,North York,1,Park,Pizza Place,Portuguese Restaurant,Intersection,Yoga Studio,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant
41,East Toronto,1,Grocery Store,Discount Store,Café,Park,Yoga Studio,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant


In [141]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2,Fast Food Restaurant,Yoga Studio,Flea Market,Fish & Chips Shop,Field,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
80,York,2,Fast Food Restaurant,Construction & Landscaping,Coffee Shop,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Falafel Restaurant


In [142]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,3,Bar,Donut Shop,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
3,Scarborough,3,Soccer Field,Business Service,Korean Restaurant,Yoga Studio,Dumpling Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
4,Scarborough,3,Construction & Landscaping,Gaming Cafe,Trail,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Event Space,Donut Shop
5,Scarborough,3,Restaurant,Grocery Store,Indian Restaurant,Train Station,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant,Distribution Center
6,Scarborough,3,Department Store,Hobby Shop,Discount Store,Coffee Shop,Yoga Studio,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Etobicoke,3,Grocery Store,Shopping Mall,Pizza Place,Fish & Chips Shop,Carpet Store,Electronics Store,Eastern European Restaurant,Elementary School,Empanada Restaurant,Yoga Studio
96,North York,3,Sporting Goods Shop,Yoga Studio,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
97,North York,3,Coffee Shop,Nightclub,Park,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Yoga Studio
100,Etobicoke,3,Beach Bar,Home Service,Music Venue,Arts & Crafts Store,Gym,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant


In [143]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,4,Convenience Store,Auto Garage,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Donut Shop
40,East YorkEast Toronto,4,Convenience Store,Flea Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
