In [18]:
# Reading the wiki page and scrapping the Postal code table information 
import numpy as np
import pandas as pd
from pandas.io.html import read_html
link="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
tables = pd.read_html(link,header=0)[0] # 0 represents the 1st table in the link and 1 represent the 2nd table

tables

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West
286,M8Z,Etobicoke,South of Bloor


In [19]:
# Converting to pandas dataframe 
PC_can=pd.DataFrame(tables)
PC_can.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [20]:
# Drop rows where borough=Not assigned
PC_cano=PC_can[PC_can.Borough!='Not assigned']
PC_cano

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
...,...,...,...
282,M8Z,Etobicoke,Kingsway Park South West
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West


In [21]:
#More than one neighborhood can exist in one postal code area...
#...These two rows will be combined into one row with the neighborhoods separated with a comma
df = (PC_cano['Neighbourhood'].str.split(', ') 
                    .groupby(PC_cano['Postcode'])
                    .agg({ 'Neighbourhood':lambda x: ', '.join(set(y for z in x for y in z))})
                    .reset_index())
df


is deprecated and will be removed in a future version. Use                 named aggregation instead.

    >>> grouper.agg(name_1=func_1, name_2=func_2)

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Postcode,Neighbourhood
0,M1B,"Rouge, Malvern"
1,M1C,"Port Union, Highland Creek, Rouge Hill"
2,M1E,"Guildwood, West Hill, Morningside"
3,M1G,Woburn
4,M1H,Cedarbrae
...,...,...
98,M9N,Weston
99,M9P,Westmount
100,M9R,"Richview Gardens, Martin Grove Gardens, St. Ph..."
101,M9V,"Albion Gardens, Mount Olive, Thistletown, Humb..."


In [22]:
# Adding Borough from PC_cano dataset to the df dataset with similar Postcodes

dfr=df.join(PC_cano.set_index('Postcode')[['Borough']], on='Postcode') 
dfr # generates new dataset but with duplicate values of Postcode since neighbourhod was not split and joined in 1st dataframe
dfr = dfr.drop_duplicates(subset='Postcode', keep='first')#Dropping duplicate postcodes
dfr

Unnamed: 0,Postcode,Neighbourhood,Borough
0,M1B,"Rouge, Malvern",Scarborough
1,M1C,"Port Union, Highland Creek, Rouge Hill",Scarborough
2,M1E,"Guildwood, West Hill, Morningside",Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough
...,...,...,...
98,M9N,Weston,York
99,M9P,Westmount,Etobicoke
100,M9R,"Richview Gardens, Martin Grove Gardens, St. Ph...",Etobicoke
101,M9V,"Albion Gardens, Mount Olive, Thistletown, Humb...",Etobicoke


In [23]:
dfr.loc[dfr['Neighbourhood'] == 'Not assigned']

Unnamed: 0,Postcode,Neighbourhood,Borough
85,M7A,Not assigned,Queen's Park


In [24]:
#Not assigned Neighborhood copies same values as Boroughs
dfr_copy=dfr.copy()
dfr_copy.loc[dfr['Neighbourhood']=='Not assigned', 'Neighbourhood'] = dfr['Borough']
dfr_copy

Unnamed: 0,Postcode,Neighbourhood,Borough
0,M1B,"Rouge, Malvern",Scarborough
1,M1C,"Port Union, Highland Creek, Rouge Hill",Scarborough
2,M1E,"Guildwood, West Hill, Morningside",Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough
...,...,...,...
98,M9N,Weston,York
99,M9P,Westmount,Etobicoke
100,M9R,"Richview Gardens, Martin Grove Gardens, St. Ph...",Etobicoke
101,M9V,"Albion Gardens, Mount Olive, Thistletown, Humb...",Etobicoke


In [25]:
# Chnage name of column Postcode to Postal code
df1=dfr_copy.rename(columns={"Postcode": "Postal Code"})
df1

Unnamed: 0,Postal Code,Neighbourhood,Borough
0,M1B,"Rouge, Malvern",Scarborough
1,M1C,"Port Union, Highland Creek, Rouge Hill",Scarborough
2,M1E,"Guildwood, West Hill, Morningside",Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough
...,...,...,...
98,M9N,Weston,York
99,M9P,Westmount,Etobicoke
100,M9R,"Richview Gardens, Martin Grove Gardens, St. Ph...",Etobicoke
101,M9V,"Albion Gardens, Mount Olive, Thistletown, Humb...",Etobicoke


In [26]:
df1.shape

(103, 3)

In [27]:
#Reading the geospatial data for locations (latitutudes and longitudes) of Canada
link2="http://cocl.us/Geospatial_data"
tables2 = pd.read_csv(link2) # 0 represents the 1st table in the link and 1 represent the 2nd table

tables2
PC=pd.DataFrame(tables2)
PC

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [28]:
#Merging location inormtaion and Postl code values for Canada 
pc=pd.merge(df1,PC, on='Postal Code')# merge the two datasets
pc

Unnamed: 0,Postal Code,Neighbourhood,Borough,Latitude,Longitude
0,M1B,"Rouge, Malvern",Scarborough,43.806686,-79.194353
1,M1C,"Port Union, Highland Creek, Rouge Hill",Scarborough,43.784535,-79.160497
2,M1E,"Guildwood, West Hill, Morningside",Scarborough,43.763573,-79.188711
3,M1G,Woburn,Scarborough,43.770992,-79.216917
4,M1H,Cedarbrae,Scarborough,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,Weston,York,43.706876,-79.518188
99,M9P,Westmount,Etobicoke,43.696319,-79.532242
100,M9R,"Richview Gardens, Martin Grove Gardens, St. Ph...",Etobicoke,43.688905,-79.554724
101,M9V,"Albion Gardens, Mount Olive, Thistletown, Humb...",Etobicoke,43.739416,-79.588437


In [29]:
import numpy as np
import pandas as pd

import json
# !conda install -c conda-forge requests sklearn geopy --yes

from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans 
# !conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Libraries imported')

Libraries imported


In [30]:
#pd.set_option('display max_columns',None)

#pd.set_option('display max_rows',None)

In [31]:
address='Toronto'
geolocator=Nominatim(user_agent="To_explorer")
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geographical co-ordinates of Toronto are {},{}.'.format(latitude,longitude))


The geographical co-ordinates of Toronto are 43.653963,-79.387207.


In [32]:
map_Toronto=folium.Map(location=[43.653963,-79.387207],zoom_start=10)
for lat,lng,borough,neighborhood in zip(pc['Latitude'],pc['Longitude'],pc['Borough'],pc['Neighbourhood']):
    label='{},{}'.format(neighborhood,borough)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill='True',
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_Toronto)
    
map_Toronto

In [33]:
CLIENT_ID='ZV2JW52JKPTQDUSXTD40M0YZY31SMGIUUMGUUYUUFTRXUINU'
CLIENT_SECRET='PCSWW4RIXAQAK50MIEP5ELQO04MN4OI0ZH3WFBE24AB3JZIR'
VERSION='20180604'
LIMIT= 30
print('Your credentials:')
print('Client_ID:' + CLIENT_ID)
print('Client_Secret:' + CLIENT_SECRET)

Your credentials:
Client_ID:ZV2JW52JKPTQDUSXTD40M0YZY31SMGIUUMGUUYUUFTRXUINU
Client_Secret:PCSWW4RIXAQAK50MIEP5ELQO04MN4OI0ZH3WFBE24AB3JZIR


In [34]:
# We select only those Borughs with Toronto values since we want to analyse only neighborhood of Toronto city

PC_T=pc[pc['Borough'].str.contains("Toronto")]
PC_T=pd.DataFrame(PC_T)#create a new dataframe
PC_T

Unnamed: 0,Postal Code,Neighbourhood,Borough,Latitude,Longitude
37,M4E,The Beaches,East Toronto,43.676357,-79.293031
41,M4K,"Riverdale, The Danforth West",East Toronto,43.679557,-79.352188
42,M4L,"The Beaches West, India Bazaar",East Toronto,43.668999,-79.315572
43,M4M,Studio District,East Toronto,43.659526,-79.340923
44,M4N,Lawrence Park,Central Toronto,43.72802,-79.38879
45,M4P,Davisville North,Central Toronto,43.712751,-79.390197
46,M4R,North Toronto West,Central Toronto,43.715383,-79.405678
47,M4S,Davisville,Central Toronto,43.704324,-79.38879
48,M4T,"Moore Park, Summerhill East",Central Toronto,43.689574,-79.38316
49,M4V,"Forest Hill SE, Deer Park, South Hill, Rathnel...",Central Toronto,43.686412,-79.400049


In [41]:
#Defining the function to get venues from Foursqaure API

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)


In [42]:
# Toronto neighborhood venues generated 
Toronto_venues=getNearbyVenues(names= PC_T['Neighbourhood'],
                              latitudes= PC_T['Latitude'],
                              longitudes= PC_T['Longitude'])

Toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood


In [43]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,30,30,30,30,30,30
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
"Cabbagetown, St. James Town",30,30,30,30,30,30
Central Bay Street,30,30,30,30,30,30
Christie,16,16,16,16,16,16
Church and Wellesley,30,30,30,30,30,30
Davisville,30,30,30,30,30,30
Davisville North,9,9,9,9,9,9
"Design Exchange, Toronto Dominion Centre",30,30,30,30,30,30
"Dufferin, Dovercourt Village",15,15,15,15,15,15


In [44]:
# Analyze each neighborhood based on Vneue category 
Toronto_onehot=pd.get_dummies(Toronto_venues[['Venue Category']],prefix="",prefix_sep="")
Toronto_onehot['Neighbourhood']=Toronto_venues['Neighborhood']
fixed_columns=[Toronto_onehot.columns[-1]]+list(Toronto_onehot.columns[:-1])
Toronto_onehot=Toronto_onehot[fixed_columns]
Toronto_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Art Gallery,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [58]:
Toronto_venues


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
...,...,...,...,...,...,...,...
831,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,The Ashbridge Estate,43.664691,-79.321805,Garden
832,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,TTC Russell Division,43.664908,-79.322560,Light Rail Station
833,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,The Ten Spot,43.664815,-79.324213,Spa
834,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,Toronto Yoga Mamas,43.664824,-79.324335,Yoga Studio


In [61]:
Toronto_venue_unique_count = Toronto_venues['Venue Category'].value_counts().to_frame(name='Count')
Toronto_venue_unique_count


Unnamed: 0,Count
Café,61
Coffee Shop,55
Restaurant,26
Park,25
Italian Restaurant,25
...,...
Flea Market,1
Aquarium,1
Eastern European Restaurant,1
Market,1


In [45]:
#Grouping Toronto venues and Neighborhood 
Toronto_grouped=Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Art Gallery,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0
1,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
2,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
9,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
# Top venues generated
num_top_venues=5

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp=Toronto_grouped[Toronto_grouped['Neighbourhood']==hood].T.reset_index()
    temp.columns=['venue','freq']
    temp=temp.iloc[1:]
    temp['freq']=temp.round({'freq':2})
    print(temp.sort_values('freq',ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                           venue                           freq
0                    Yoga Studio                    Yoga Studio
1                       Wine Bar                       Wine Bar
2          Vietnamese Restaurant          Vietnamese Restaurant
3               Video Game Store               Video Game Store
4  Vegetarian / Vegan Restaurant  Vegetarian / Vegan Restaurant


----Business Reply Mail Processing Centre 969 Eastern----
                           venue                           freq
0                    Yoga Studio                    Yoga Studio
1                       Wine Bar                       Wine Bar
2          Vietnamese Restaurant          Vietnamese Restaurant
3               Video Game Store               Video Game Store
4  Vegetarian / Vegan Restaurant  Vegetarian / Vegan Restaurant


----Cabbagetown, St. James Town----
                           venue                           freq
0                    Yoga Studio                  

4  Vegetarian / Vegan Restaurant  Vegetarian / Vegan Restaurant


----St. James Town----
                           venue                           freq
0                    Yoga Studio                    Yoga Studio
1                       Wine Bar                       Wine Bar
2          Vietnamese Restaurant          Vietnamese Restaurant
3               Video Game Store               Video Game Store
4  Vegetarian / Vegan Restaurant  Vegetarian / Vegan Restaurant


----Stn A PO Boxes 25 The Esplanade----
                           venue                           freq
0                    Yoga Studio                    Yoga Studio
1                       Wine Bar                       Wine Bar
2          Vietnamese Restaurant          Vietnamese Restaurant
3               Video Game Store               Video Game Store
4  Vegetarian / Vegan Restaurant  Vegetarian / Vegan Restaurant


----Studio District----
                           venue                           freq
0          

In [47]:
# Pandas dataframe
def return_most_common_venues(row,num_top_venues):
    row_categories=row.iloc[1:]
    row_categories_sorted=row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [48]:
num_top_venues=10

indicators=['st','nd','rd']

columns=['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venues'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
neighbourhoods_venues_sorted=pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood']=Toronto_grouped['Neighbourhood']
for ind in np.arange(Toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:]=return_most_common_venues(Toronto_grouped.iloc[ind,:],num_top_venues)
    
neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Farmers Market,Seafood Restaurant,Cocktail Bar,Beer Bar,Café,Museum,Fountain,Liquor Store,Breakfast Spot,Jazz Club
1,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Smoke Shop,Light Rail Station
2,"Cabbagetown, St. James Town",Restaurant,Coffee Shop,Italian Restaurant,Café,Bakery,General Entertainment,Caribbean Restaurant,Liquor Store,Jewelry Store,Diner
3,Central Bay Street,Coffee Shop,Bubble Tea Shop,Spa,Italian Restaurant,Café,Miscellaneous Shop,Seafood Restaurant,Japanese Restaurant,Ramen Restaurant,Bar
4,Christie,Grocery Store,Café,Park,Athletics & Sports,Italian Restaurant,Diner,Convenience Store,Nightclub,Restaurant,Baby Store


In [49]:
neighbourhoods_venues_sorted.count()

Neighbourhood             38
1st Most Common Venues    38
2nd Most Common Venues    38
3rd Most Common Venues    38
4th Most Common Venue     38
5th Most Common Venue     38
6th Most Common Venue     38
7th Most Common Venue     38
8th Most Common Venue     38
9th Most Common Venue     38
10th Most Common Venue    38
dtype: int64

In [50]:
#Cluster KNN clustering neighborhoods 
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5
Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(Toronto_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [51]:
#Generate cluster labels
neighbourhoods_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)

Toronto_merged = PC_T
Toronto_merged = Toronto_merged.merge(neighbourhoods_venues_sorted, on = 'Neighbourhood')

Toronto_merged.head()

Unnamed: 0,Postal Code,Neighbourhood,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,The Beaches,East Toronto,43.676357,-79.293031,2,Park,Trail,Health Food Store,Neighborhood,Pub,Asian Restaurant,Dance Studio,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant
1,M4K,"Riverdale, The Danforth West",East Toronto,43.679557,-79.352188,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Coffee Shop,Cosmetics Shop,Pizza Place,Pub,Dessert Shop,Diner
2,M4L,"The Beaches West, India Bazaar",East Toronto,43.668999,-79.315572,0,Park,Sandwich Place,Pet Store,Ice Cream Shop,Movie Theater,Pub,Burrito Place,Burger Joint,Liquor Store,Brewery
3,M4M,Studio District,East Toronto,43.659526,-79.340923,0,Café,Coffee Shop,Italian Restaurant,Bakery,Latin American Restaurant,Stationery Store,Fish Market,Bookstore,Seafood Restaurant,Sandwich Place
4,M4N,Lawrence Park,Central Toronto,43.72802,-79.38879,1,Bus Line,Park,Swim School,Yoga Studio,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store


In [52]:
# create Clustered Neighborhood  map
map_clusters = folium.Map(location = [43.653963,-79.387207], zoom_start = 11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = rainbow[cluster - 1],
        fill = True,
        fill_color = rainbow[cluster - 1],
        fill_opacity = 0.7).add_to(map_clusters)
       
map_clusters


In [53]:
# Analysing each clusters and then interpretin them

Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Riverdale, The Danforth West",0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Coffee Shop,Cosmetics Shop,Pizza Place,Pub,Dessert Shop,Diner
2,"The Beaches West, India Bazaar",0,Park,Sandwich Place,Pet Store,Ice Cream Shop,Movie Theater,Pub,Burrito Place,Burger Joint,Liquor Store,Brewery
3,Studio District,0,Café,Coffee Shop,Italian Restaurant,Bakery,Latin American Restaurant,Stationery Store,Fish Market,Bookstore,Seafood Restaurant,Sandwich Place
5,Davisville North,0,Hotel,Gym,Park,Clothing Store,Breakfast Spot,Convenience Store,Food & Drink Shop,Sandwich Place,Dessert Shop,Ethiopian Restaurant
6,North Toronto West,0,Coffee Shop,Sporting Goods Shop,Yoga Studio,Italian Restaurant,Salon / Barbershop,Restaurant,Rental Car Location,Park,Mexican Restaurant,Diner
7,Davisville,0,Dessert Shop,Sushi Restaurant,Sandwich Place,Coffee Shop,Gym,Italian Restaurant,Café,Pizza Place,Seafood Restaurant,Diner
9,"Forest Hill SE, Deer Park, South Hill, Rathnel...",0,Coffee Shop,Pub,Bagel Shop,Vietnamese Restaurant,Light Rail Station,Liquor Store,Supermarket,Pizza Place,American Restaurant,Health & Beauty Service
11,"Cabbagetown, St. James Town",0,Restaurant,Coffee Shop,Italian Restaurant,Café,Bakery,General Entertainment,Caribbean Restaurant,Liquor Store,Jewelry Store,Diner
12,Church and Wellesley,0,Gay Bar,Burger Joint,Bookstore,Bubble Tea Shop,Salon / Barbershop,Restaurant,Ramen Restaurant,Pub,Pizza Place,Gastropub
13,"Harbourfront, Regent Park",0,Coffee Shop,Bakery,Park,Breakfast Spot,Mexican Restaurant,Yoga Studio,Greek Restaurant,Pub,Performing Arts Venue,Historic Site


In [54]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Lawrence Park,1,Bus Line,Park,Swim School,Yoga Studio,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store


In [55]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,2,Park,Trail,Health Food Store,Neighborhood,Pub,Asian Restaurant,Dance Studio,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant
10,Rosedale,2,Park,Playground,Trail,Building,Yoga Studio,Dance Studio,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dog Run


In [56]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Roselawn,3,Music Venue,Garden,Dance Studio,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store,Diner


In [57]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venues,2nd Most Common Venues,3rd Most Common Venues,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Moore Park, Summerhill East",4,Playground,Summer Camp,Tennis Court,Yoga Studio,Dance Studio,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dog Run
