In [3]:
import pandas as pd
import numpy as np
import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
geo_data = pd.read_csv('https://cocl.us/Geospatial_data')
dfp = pd.read_html(url)

In [4]:
df_edited = dfp[0]
df = df_edited[['Postal code','Borough', 'Neighborhood']]
df1 = df[df.Borough != 'Not assigned']

In [5]:
geo_data = geo_data.rename(columns = {'Postal Code':'Postal code'})
geo_data.head(2)

Unnamed: 0,Postal code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497


In [6]:
df2 = df1.merge(geo_data, on = 'Postal code', how = 'left')
df2

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.654260,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North,43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business reply mail Processing CentrE,43.662744,-79.321558
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...,43.636258,-79.498509


In [7]:
nyork_data = df2[df2['Borough'] == 'North York'].reset_index(drop = True)
nyork_data.shape

(24, 5)

In [8]:
downtown_data = df2[df2['Borough'] == 'Downtown Toronto'].reset_index(drop = True)
downtown_data.shape

(19, 5)

In [11]:
etob_data = df2[df2['Borough'] == 'Etobicoke'].reset_index(drop = True)
etob_data.shape

(12, 5)

In [12]:
# Toronto coordinates
latitude_n = 43.6532
longitude_n = (-79.3832)

In [13]:
CLIENT_ID = 'JFJSB5K45XHMFNY44DLYJ0OQ4GAYYCDQOQM2HGYFYIE3SVOD'
CLIENT_SECRET = 'E0O5PFWYNXB1ZTGZLHAC1LRZFFQJHBMDR2TPKQPAW5APUSRM'
VERSION = '20200404'
LIMIT = '100'

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
nyork_venues = getNearbyVenues(names=nyork_data['Neighborhood'],
                                latitudes=nyork_data['Latitude'],
                                longitudes=nyork_data['Longitude'],)


downtown_venues = getNearbyVenues(names=downtown_data['Neighborhood'],
                                latitudes=downtown_data['Latitude'],
                                longitudes=downtown_data['Longitude'],)

etob_venues = getNearbyVenues(names=etob_data['Neighborhood'],
                                latitudes=etob_data['Latitude'],
                                longitudes=etob_data['Longitude'],)

Parkwoods
Victoria Village
Lawrence Manor / Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor / Wilson Heights / Downsview North
Fairview / Henry Farm / Oriole
Northwood Park / York University
Bayview Village
Downsview
York Mills / Silver Hills
Downsview
North Park / Maple Leaf Park / Upwood Park
Humber Summit
Willowdale / Newtonbrook
Downsview
Bedford Park / Lawrence Manor East
Humberlea / Emery
Willowdale
Downsview
York Mills West
Willowdale
Regent Park / Harbourfront
Queen's Park / Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond / Adelaide / King
Harbourfront East / Union Station / Toronto Islands
Toronto Dominion Centre / Design Exchange
Commerce Court / Victoria Hotel
University of Toronto / Harbord
Kensington Market / Chinatown / Grange Park
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport
Rosedale
Stn A PO Boxes
St. 

In [16]:
nyork_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [17]:
downtown_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [18]:
etob_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,LCBO,43.642099,-79.576592,Liquor Store
1,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,Starbucks,43.641312,-79.576924,Coffee Shop
2,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,The Beer Store,43.641313,-79.576925,Beer Store
3,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,Shoppers Drug Mart,43.641312,-79.576924,Cosmetics Shop
4,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,Pizza Hut,43.641845,-79.576556,Pizza Place


In [20]:
nyork_onehot = pd.get_dummies(nyork_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
nyork_onehot['Neighborhood'] = nyork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [nyork_onehot.columns[-1]] + list(nyork_onehot.columns[:-1])
nyork_onehot = nyork_onehot[fixed_columns]

#for Downtown Toronto
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

#Etobicoke
etob_onehot = pd.get_dummies(etob_venues[['Venue Category']], prefix="", prefix_sep="")
etob_onehot['Neighborhood'] = etob_venues['Neighborhood'] 
fixed_columns = [etob_onehot.columns[-1]] + list(etob_onehot.columns[:-1])
etob_onehot = etob_onehot[fixed_columns]


In [21]:
nyork_grouped = nyork_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
etob_grouped = etob_onehot.groupby('Neighborhood').mean().reset_index()

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
# North York
num_top_venues = 5

for hood in nyork_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = nyork_grouped[nyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor / Wilson Heights / Downsview North----
           venue  freq
0    Coffee Shop  0.11
1           Bank  0.11
2  Shopping Mall  0.05
3    Supermarket  0.05
4  Deli / Bodega  0.05


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Bank  0.25
2                 Café  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park / Lawrence Manor East----
                venue  freq
0      Sandwich Place  0.08
1          Restaurant  0.08
2         Pizza Place  0.08
3    Sushi Restaurant  0.08
4  Italian Restaurant  0.08


----Don Mills----
                 venue  freq
0           Restaurant  0.07
1     Asian Restaurant  0.07
2                  Gym  0.07
3  Japanese Restaurant  0.07
4          Coffee Shop  0.07


----Downsview----
                        venue  freq
0               Grocery Store  0.23
1                        Park  0.15
2                        Bank  0.08
3  Construction & Landscaping  0.

In [71]:
# Downtown Toronto
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.05
1        Cocktail Bar  0.05
2            Beer Bar  0.04
3  Italian Restaurant  0.04
4      Farmers Market  0.04


----CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport----
                venue  freq
0     Airport Service  0.17
1      Airport Lounge  0.11
2    Airport Terminal  0.11
3         Coffee Shop  0.06
4  Airport Food Court  0.06


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.22
1   Italian Restaurant  0.07
2                 Café  0.05
3       Sandwich Place  0.05
4  Japanese Restaurant  0.03


----Christie----
           venue  freq
0  Grocery Store  0.22
1           Café  0.17
2           Park  0.11
3      Nightclub  0.06
4    Candy Store  0.06


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.08
1     Sushi Restaurant  0.07
2  Japanese Restaurant  0.05
3           Re

In [72]:
# Etobicoke
num_top_venues = 5

for hood in etob_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etob_grouped[etob_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood / Long Branch----
         venue  freq
0  Pizza Place   0.2
1  Coffee Shop   0.1
2          Pub   0.1
3          Gym   0.1
4     Pharmacy   0.1


----Eringate / Bloordale Gardens / Old Burnhamthorpe / Markland Wood----
         venue  freq
0   Beer Store  0.12
1    Pet Store  0.12
2  Pizza Place  0.12
3         Café  0.12
4  Coffee Shop  0.12


----Kingsview Village / St. Phillips / Martin Grove Gardens / Richview Gardens----
                 venue  freq
0          Pizza Place  0.25
1       Sandwich Place  0.25
2    Mobile Phone Shop  0.25
3                 Park  0.25
4  American Restaurant  0.00


----Mimico NW / The Queensway West / South of Bloor / Kingsway Park South West / Royal York South West----
            venue  freq
0     Wings Joint  0.07
1  Sandwich Place  0.07
2      Kids Store  0.07
3  Hardware Store  0.07
4             Gym  0.07


----New Toronto / Mimico South / Humber Bay Shores----
                 venue  freq
0                 Café  0.14
1  American Re

In [73]:
# North York
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
nyork_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
nyork_neighborhoods_venues_sorted['Neighborhood'] = nyork_grouped['Neighborhood']

for ind in np.arange(nyork_grouped.shape[0]):
    nyork_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(nyork_grouped.iloc[ind, :], num_top_venues)

nyork_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bathurst Manor / Wilson Heights / Downsview North,Bank,Coffee Shop,Supermarket,Pizza Place,Bridal Shop
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Electronics Store
2,Bedford Park / Lawrence Manor East,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Sushi Restaurant
3,Don Mills,Restaurant,Beer Store,Asian Restaurant,Coffee Shop,Japanese Restaurant
4,Downsview,Grocery Store,Park,Liquor Store,Gym / Fitness Center,Baseball Field


In [74]:
nyork_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bathurst Manor / Wilson Heights / Downsview North,Bank,Coffee Shop,Supermarket,Pizza Place,Bridal Shop
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Electronics Store
2,Bedford Park / Lawrence Manor East,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Sushi Restaurant
3,Don Mills,Restaurant,Beer Store,Asian Restaurant,Coffee Shop,Japanese Restaurant
4,Downsview,Grocery Store,Park,Liquor Store,Gym / Fitness Center,Baseball Field
5,Fairview / Henry Farm / Oriole,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Bakery
6,Glencairn,Japanese Restaurant,Playground,Bakery,Pub,Discount Store
7,Hillcrest Village,Dog Run,Golf Course,Pool,Mediterranean Restaurant,Discount Store
8,Humber Summit,Pizza Place,Empanada Restaurant,Women's Store,Discount Store,Concert Hall
9,Humberlea / Emery,Food Service,Baseball Field,Women's Store,Electronics Store,Construction & Landscaping


In [75]:
# Downtown Toronto
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
downtown_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
downtown_neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    downtown_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

downtown_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Bakery
1,CN Tower / King and Spadina / Railway Lands / ...,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Harbor / Marina
2,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Salad Place
3,Christie,Grocery Store,Café,Park,Athletics & Sports,Gas Station
4,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Hotel


In [76]:
downtown_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Bakery
1,CN Tower / King and Spadina / Railway Lands / ...,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Harbor / Marina
2,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Salad Place
3,Christie,Grocery Store,Café,Park,Athletics & Sports,Gas Station
4,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Hotel
5,Commerce Court / Victoria Hotel,Coffee Shop,Restaurant,Café,Hotel,Gym
6,First Canadian Place / Underground city,Coffee Shop,Café,Restaurant,Hotel,Gym
7,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant
8,Harbourfront East / Union Station / Toronto Is...,Coffee Shop,Aquarium,Italian Restaurant,Restaurant,Café
9,Kensington Market / Chinatown / Grange Park,Café,Coffee Shop,Vietnamese Restaurant,Dessert Shop,Mexican Restaurant


In [77]:
# Etob
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
etob_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
etob_neighborhoods_venues_sorted['Neighborhood'] = etob_grouped['Neighborhood']

for ind in np.arange(etob_grouped.shape[0]):
    etob_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etob_grouped.iloc[ind, :], num_top_venues)

etob_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Alderwood / Long Branch,Pizza Place,Pharmacy,Athletics & Sports,Skating Rink,Gym
1,Eringate / Bloordale Gardens / Old Burnhamthor...,Pizza Place,Pet Store,Beer Store,Liquor Store,Cosmetics Shop
2,Kingsview Village / St. Phillips / Martin Grov...,Sandwich Place,Mobile Phone Shop,Pizza Place,Park,Wings Joint
3,Mimico NW / The Queensway West / South of Bloo...,Wings Joint,Kids Store,Bakery,Burger Joint,Burrito Place
4,New Toronto / Mimico South / Humber Bay Shores,Café,American Restaurant,Pizza Place,Gym,Liquor Store


In [78]:
etob_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Alderwood / Long Branch,Pizza Place,Pharmacy,Athletics & Sports,Skating Rink,Gym
1,Eringate / Bloordale Gardens / Old Burnhamthor...,Pizza Place,Pet Store,Beer Store,Liquor Store,Cosmetics Shop
2,Kingsview Village / St. Phillips / Martin Grov...,Sandwich Place,Mobile Phone Shop,Pizza Place,Park,Wings Joint
3,Mimico NW / The Queensway West / South of Bloo...,Wings Joint,Kids Store,Bakery,Burger Joint,Burrito Place
4,New Toronto / Mimico South / Humber Bay Shores,Café,American Restaurant,Pizza Place,Gym,Liquor Store
5,Northwest,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint
6,Old Mill South / King's Mill Park / Sunnylea /...,Baseball Field,Breakfast Spot,Locksmith,Construction & Landscaping,Wings Joint
7,South Steeles / Silverstone / Humbergate / Jam...,Grocery Store,Pizza Place,Fast Food Restaurant,Discount Store,Japanese Restaurant
8,The Kingsway / Montgomery Road / Old Mill North,River,Pool,Park,Wings Joint,Chinese Restaurant
9,Westmount,Pizza Place,Chinese Restaurant,Intersection,Sandwich Place,Discount Store


In [62]:
# Creating Clusters
kclusters = 10

nyork_grouped_clustering = nyork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans_n = KMeans(n_clusters=kclusters, random_state=0).fit(nyork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans_n.labels_[0:10]

array([3, 4, 3, 3, 0, 3, 9, 8, 6, 2])

In [31]:
# Downtown Toronto
kclusters = 5
downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

kmeans_d = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)
kmeans_d.labels_[0:5]

array([0, 3, 4, 2, 0])

In [68]:
# Etob
kclusters = 10
etob_grouped_clustering = etob_grouped.drop('Neighborhood', 1)

kmeans_e = KMeans(n_clusters=kclusters, random_state=0).fit(etob_grouped_clustering)
kmeans_e.labels_[0:10]

array([2, 8, 5, 9, 4, 0, 1, 6, 3, 7])

In [63]:
#Merging final dataframe
nyork_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans_n.labels_)
nyork_merged = nyork_data
nyork_merged = nyork_merged.join(nyork_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

nyork_merged1 = nyork_merged.drop(nyork_merged.index[16])
nyork_merged1.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,7.0,Park,Food & Drink Shop,Fireworks Store,Discount Store,Comfort Food Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,3.0,Coffee Shop,Grocery Store,Hockey Arena,Portuguese Restaurant,Nail Salon
2,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763,3.0,Clothing Store,Women's Store,Shoe Store,Boutique,Coffee Shop
3,M3B,North York,Don Mills,43.745906,-79.352188,3.0,Restaurant,Beer Store,Asian Restaurant,Coffee Shop,Japanese Restaurant
4,M6B,North York,Glencairn,43.709577,-79.445073,9.0,Japanese Restaurant,Playground,Bakery,Pub,Discount Store


In [35]:
downtown_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans_d.labels_)
downtown_merged = downtown_data
downtown_merged = downtown_merged.join(downtown_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged1 = downtown_merged.drop(downtown_merged.index[16])
downtown_merged1.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,0,Coffee Shop,Bakery,Park,Pub,Restaurant
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494,4,Coffee Shop,Diner,Sushi Restaurant,Gym,Mexican Restaurant
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Italian Restaurant,Gastropub,Cocktail Bar
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Bakery


In [69]:
etob_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans_e.labels_)
etob_merged = etob_data
etob_merged = etob_merged.join(etob_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etob_merged1 = etob_merged.drop(etob_merged.index[11])
etob_merged1.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,
1,M9B,Etobicoke,West Deane Park / Princess Gardens / Martin Gr...,43.650943,-79.554724,,,,,,
2,M9C,Etobicoke,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,8.0,Pizza Place,Pet Store,Beer Store,Liquor Store,Cosmetics Shop
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,7.0,Pizza Place,Chinese Restaurant,Intersection,Sandwich Place,Discount Store
4,M9R,Etobicoke,Kingsview Village / St. Phillips / Martin Grov...,43.688905,-79.554724,5.0,Sandwich Place,Mobile Phone Shop,Pizza Place,Park,Wings Joint


In [52]:
etob_merged2 = etob_merged1.drop(etob_merged1.index[[0,1]]).reset_index(drop = True)

In [54]:
etob_merged2

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M9C,Etobicoke,Eringate / Bloordale Gardens / Old Burnhamthor...,43.643515,-79.577201,0.0,Pizza Place,Pet Store,Beer Store,Liquor Store,Cosmetics Shop
1,M9P,Etobicoke,Westmount,43.696319,-79.532242,0.0,Pizza Place,Chinese Restaurant,Intersection,Sandwich Place,Discount Store
2,M9R,Etobicoke,Kingsview Village / St. Phillips / Martin Grov...,43.688905,-79.554724,4.0,Sandwich Place,Mobile Phone Shop,Pizza Place,Park,Wings Joint
3,M8V,Etobicoke,New Toronto / Mimico South / Humber Bay Shores,43.605647,-79.501321,0.0,Café,American Restaurant,Pizza Place,Gym,Liquor Store
4,M9V,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...,43.739416,-79.588437,0.0,Grocery Store,Pizza Place,Fast Food Restaurant,Discount Store,Japanese Restaurant
5,M8W,Etobicoke,Alderwood / Long Branch,43.602414,-79.543484,0.0,Pizza Place,Pharmacy,Athletics & Sports,Skating Rink,Gym
6,M9W,Etobicoke,Northwest,43.706748,-79.594054,1.0,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint
7,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North,43.653654,-79.506944,2.0,River,Pool,Park,Wings Joint,Chinese Restaurant
8,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...,43.636258,-79.498509,3.0,Baseball Field,Breakfast Spot,Locksmith,Construction & Landscaping,Wings Joint


In [64]:
#North York
nyork_clusters = folium.Map(location=[latitude_n, longitude_n], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nyork_merged1['Latitude'], nyork_merged1['Longitude'], nyork_merged1['Neighborhood'], nyork_merged1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(nyork_clusters)
       
nyork_clusters

In [65]:
#Downtown Toronto
downtown_clusters = folium.Map(location=[latitude_n, longitude_n], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged1['Latitude'], downtown_merged1['Longitude'], downtown_merged1['Neighborhood'], downtown_merged1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(downtown_clusters)
       
downtown_clusters

In [70]:
# Etob
etob_clusters = folium.Map(location=[latitude_n, longitude_n], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etob_merged2['Latitude'], etob_merged2['Longitude'], etob_merged2['Neighborhood'], etob_merged2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(etob_clusters)
       
etob_clusters