In [1]:
import pandas as pd
import numpy as np

# Scraping Wikipedia Page to Dataframe

In [2]:
#read wikipedia into pandas dataframe
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [3]:
#extract target table
df=tables[0]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [4]:
#rename columns
df.columns=['PostalCode', 'Borough', 'Neighborhood']

In [5]:
#drop "Not assigned" in Borough column
postalcode=df.replace('Not assigned', np.NaN).dropna(subset=['Borough'], axis=0).reset_index(drop=True)
postalcode

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:
#There is no "Not assigned" neighborhood remaining
postalcode.loc[postalcode['Neighborhood'] == np.NaN]

Unnamed: 0,PostalCode,Borough,Neighborhood


In [7]:
# There is no duplicate in PostalCode
postalcode['PostalCode'].unique

<bound method Series.unique of 0      M3A
1      M4A
2      M5A
3      M6A
4      M7A
      ... 
98     M8X
99     M4Y
100    M7Y
101    M8Y
102    M8Z
Name: PostalCode, Length: 103, dtype: object>

In [8]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(postalcode['Borough'].unique()),
        postalcode.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [9]:
postalcode.shape

(103, 3)

# Getting Latitude and Longitude Coordinates of Each Neighborhood

In [10]:
# Install geocoder
#conda install -c conda-forge geocoder


In [11]:
#Create function get_ll to obtain Latitude and Longitude Coordinates with postalcode
import geocoder # import geocoder

def get_ll (postal_code):
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    return [latitude, longitude]

In [12]:
# Adding two columns for coordinates
postalcode['Latitude'] = np.NaN
postalcode['Longitude'] = np.NaN

In [13]:
# Adding Latitude and Longitude Coordinates to each row of postalcode dataframe

for index in postalcode.index:
    ll=get_ll(postalcode.at[index, 'PostalCode'])
    postalcode.at[index, 'Latitude']=ll[0]
    postalcode.at[index, 'Longitude']=ll[1]


In [14]:
postalcode

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [15]:
# Save to csv file so that we don't need to retrive coordinates everytime 
postalcode.to_csv('postalcode')

# Explore Neighborhoods in Toronto

In [16]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Install geopy
#!conda install -c conda-forge geopy --yes
# Install folium
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
import json
import requests # library to handle requests

In [17]:
# Use geopy library to get the latitude and longitude values of Toronto.

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [18]:
toronto_data=pd.read_csv('postalcode', index_col=0)
toronto_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [19]:
# create map of New York using latitude and longitude values

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [20]:
# Eneter API credentials and version
CLIENT_ID = 'F0DYMACZXFPLP0ZNQBTNK5DRDXT5AGAQZGH4RQVYS3VR1GZY' # your Foursquare ID
CLIENT_SECRET = 'FGCZMPQ5DUW0SCMYURPFWXXRV5ZHJB2HVV52S54M3GN2PCWS' # your Foursquare Secret
ACCESS_TOKEN = 'ZTYLOKOML1W1C301PXDLWK3OEWAS2T3AUHRKW3S33DP1E4DT' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: F0DYMACZXFPLP0ZNQBTNK5DRDXT5AGAQZGH4RQVYS3VR1GZY
CLIENT_SECRET:FGCZMPQ5DUW0SCMYURPFWXXRV5ZHJB2HVV52S54M3GN2PCWS


In [21]:
# create a function to repeat the same process to all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [23]:
print(toronto_venues.shape)
toronto_venues.head()

(1356, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park
3,Victoria Village,43.73057,-79.31306,Memories of Africa,43.726602,-79.312427,Grocery Store
4,"Regent Park, Harbourfront",43.65512,-79.36264,Roselle Desserts,43.653447,-79.362017,Bakery


In [24]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,14,14,14,14,14,14
"Alderwood, Long Branch",4,4,4,4,4,4
Bayview Village,5,5,5,5,5,5
"Bedford Park, Lawrence Manor East",21,21,21,21,21,21
Berczy Park,30,30,30,30,30,30
...,...,...,...,...,...,...
"Willowdale, Willowdale West",5,5,5,5,5,5
Woburn,4,4,4,4,4,4
Woodbine Heights,16,16,16,16,16,16
York Mills West,4,4,4,4,4,4


In [25]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 223 uniques categories.


# Analyze Each Neighborhood

In [26]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
toronto_onehot.head()

Unnamed: 0,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
# get a list of columns
cols = list(toronto_onehot)
# move the Neighborhood column to head of list using index, pop and insert
cols.insert(0, cols.pop(cols.index('Neighborhood')))

In [28]:
# move neighborhood column to the first column
toronto_onehot = toronto_onehot[cols]

toronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,Agincourt,0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.033333,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.000000,0.0,0.0625,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
94,York Mills West,0.0,0.0,0.0,0.000000,0.0,0.0000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [30]:
# function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [31]:
# create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Bubble Tea Shop,Hong Kong Restaurant,Skating Rink,Bakery,Badminton Court,Discount Store,Sushi Restaurant,Department Store,Newsagent
1,"Alderwood, Long Branch",Convenience Store,Performing Arts Venue,Gym,Pub,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
2,Bayview Village,Trail,Construction & Landscaping,Dog Run,Park,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
3,"Bedford Park, Lawrence Manor East",Sandwich Place,Coffee Shop,Italian Restaurant,Thai Restaurant,Breakfast Spot,Spa,Butcher,Café,Juice Bar,Sports Club
4,Berczy Park,Farmers Market,Bakery,Seafood Restaurant,Cheese Shop,Cocktail Bar,Coffee Shop,Liquor Store,Fish Market,Restaurant,Breakfast Spot
...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",Pizza Place,Park,Grocery Store,Coffee Shop,Butcher,Zoo Exhibit,Donut Shop,Fast Food Restaurant,Farmers Market,Farm
92,Woburn,Park,Business Service,Coffee Shop,Korean BBQ Restaurant,Zoo Exhibit,Dry Cleaner,Field,Fast Food Restaurant,Farmers Market,Farm
93,Woodbine Heights,Pharmacy,Bus Line,Grocery Store,Middle Eastern Restaurant,Coffee Shop,Metro Station,Gas Station,Café,Breakfast Spot,Pizza Place
94,York Mills West,Convenience Store,Park,Coffee Shop,Speakeasy,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm


# Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 5 clusters.

In [32]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [33]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 0, 4, 0, 0, 0, 0, 0, 0])

In [34]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data



In [35]:
# merge manhattan_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,2.0,Food & Drink Shop,Park,Zoo Exhibit,Donut Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
1,M4A,North York,Victoria Village,43.73057,-79.31306,2.0,Grocery Store,Park,Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0.0,Coffee Shop,Breakfast Spot,Theater,Playground,Distribution Center,Pub,Restaurant,Electronics Store,Event Space,Spa
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,4.0,Clothing Store,Women's Store,Food Court,Cosmetics Shop,American Restaurant,Men's Store,Chocolate Shop,Coffee Shop,Pharmacy,Movie Theater
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,0.0,Coffee Shop,Sandwich Place,Bank,Café,Fried Chicken Joint,Burrito Place,Theater,Gastropub,Portuguese Restaurant,Mediterranean Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113,0.0,Lounge,Pool,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133,0.0,Coffee Shop,Men's Store,Gay Bar,Japanese Restaurant,Ice Cream Shop,Beer Bar,Martial Arts School,Pub,Steakhouse,Bubble Tea Shop
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544,0.0,Café,Restaurant,Theater,Steakhouse,Hotel,Coffee Shop,Concert Hall,Pizza Place,Mediterranean Restaurant,Tea Room
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945,4.0,Bank,Coffee Shop,Chinese Restaurant,Sushi Restaurant,Italian Restaurant,Flower Shop,Park,Fast Food Restaurant,Farmers Market,Farm


It seems Foursquare does not have data for 3 neighborhoods (see last three rows below with NaN in "Cluster Labels"). Therefore they need to be dropped

In [36]:
toronto_merged.sort_values(['Cluster Labels'])

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,M9L,North York,Humber Summit,43.75948,-79.55707,0.0,Construction & Landscaping,Home Service,Print Shop,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.64970,-79.38258,0.0,Café,American Restaurant,Coffee Shop,Gym,Restaurant,Seafood Restaurant,Tea Room,Greek Restaurant,Gluten-free Restaurant,Gastropub
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891,0.0,Park,Grocery Store,Bakery,Pizza Place,Pool,Middle Eastern Restaurant,Café,Bus Line,Smoke Shop,Bar
32,M1J,Scarborough,Scarborough Village,43.74446,-79.23117,0.0,Indian Restaurant,Park,Grocery Store,Spa,Restaurant,Dog Run,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
73,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.71458,-79.40668,0.0,Photography Studio,Playground,Park,Gym Pool,Art Gallery,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493,4.0,Coffee Shop,Plaza,Hotel,Poke Place,Bookstore,Bubble Tea Shop,Sushi Restaurant,Burger Joint,Café,Clothing Store
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.62513,-79.52681,4.0,Gym / Fitness Center,Thai Restaurant,Miscellaneous Shop,Eastern European Restaurant,Mattress Store,Burrito Place,Burger Joint,Sushi Restaurant,Bank,Pet Store
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.75788,-79.44847,,,,,,,,,,,
62,M5N,Central Toronto,Roselawn,43.71208,-79.41848,,,,,,,,,,,


In [37]:
toronto_merged=toronto_merged.dropna(subset=['Cluster Labels'])
toronto_merged=toronto_merged.astype({'Cluster Labels':'int32'})

In [38]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [39]:
# visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

## Cluster 1

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Theater,Playground,Distribution Center,Pub,Restaurant,Electronics Store,Event Space,Spa
4,Downtown Toronto,0,Coffee Shop,Sandwich Place,Bank,Café,Fried Chicken Joint,Burrito Place,Theater,Gastropub,Portuguese Restaurant,Mediterranean Restaurant
7,North York,0,Coffee Shop,Burger Joint,Intersection,Gym,Grocery Store,Smoke Shop,Bubble Tea Shop,Soccer Field,Park,Supermarket
13,North York,0,Coffee Shop,Burger Joint,Intersection,Gym,Grocery Store,Smoke Shop,Bubble Tea Shop,Soccer Field,Park,Supermarket
15,Downtown Toronto,0,Café,Cosmetics Shop,Gastropub,Japanese Restaurant,Coffee Shop,Creperie,Restaurant,Cocktail Bar,Poke Place,Park
16,York,0,Field,Hockey Arena,Grocery Store,Park,Trail,Dog Run,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
17,Etobicoke,0,Carpet Store,Park,Fish & Chips Shop,Grocery Store,College Rec Center,Electronics Store,Shopping Mall,Zoo Exhibit,Ethiopian Restaurant,Eastern European Restaurant
18,Scarborough,0,Construction & Landscaping,Gym / Fitness Center,Park,Dry Cleaner,Tea Room,Donut Shop,Field,Fast Food Restaurant,Farmers Market,Farm
20,Downtown Toronto,0,Farmers Market,Bakery,Seafood Restaurant,Cheese Shop,Cocktail Bar,Coffee Shop,Liquor Store,Fish Market,Restaurant,Breakfast Spot
21,York,0,Gym,Park,Sporting Goods Shop,Bakery,Mexican Restaurant,Beer Store,Café,Zoo Exhibit,Ethiopian Restaurant,Electronics Store


## Cluster 2

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,1,Bar,Zoo Exhibit,Flea Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space


## Cluster 3

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2,Food & Drink Shop,Park,Zoo Exhibit,Donut Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
1,North York,2,Grocery Store,Park,Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
27,North York,2,Park,Residential Building (Apartment / Condo),Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
35,East York,2,Intersection,Park,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
45,North York,2,Park,Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
68,Central Toronto,2,Park,Business Service,Zoo Exhibit,Donut Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
69,West Toronto,2,Park,Residential Building (Apartment / Condo),Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space


## Cluster 4

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,Scarborough,3,Trail,Zoo Exhibit,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant


## Cluster 5

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,4,Clothing Store,Women's Store,Food Court,Cosmetics Shop,American Restaurant,Men's Store,Chocolate Shop,Coffee Shop,Pharmacy,Movie Theater
5,Etobicoke,4,Pharmacy,Bank,Park,Grocery Store,Skating Rink,Shopping Mall,Café,Dog Run,Farm,Falafel Restaurant
6,Scarborough,4,Zoo Exhibit,Fast Food Restaurant,Donut Shop,Fish & Chips Shop,Field,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
8,East York,4,Pizza Place,Gym / Fitness Center,Athletics & Sports,Intersection,Fast Food Restaurant,Bank,Gastropub,Café,Breakfast Spot,Flea Market
9,Downtown Toronto,4,Café,Ramen Restaurant,Coffee Shop,Theater,Lake,Mexican Restaurant,Burrito Place,Shopping Mall,Burger Joint,Lounge
10,North York,4,Pizza Place,Grocery Store,Fast Food Restaurant,Japanese Restaurant,Bank,Bakery,Pub,Gas Station,Italian Restaurant,Latin American Restaurant
11,Etobicoke,4,Pizza Place,Sandwich Place,Chinese Restaurant,Tea Room,Zoo Exhibit,Dog Run,Farmers Market,Farm,Falafel Restaurant,Event Space
14,East York,4,Pharmacy,Bus Line,Grocery Store,Middle Eastern Restaurant,Coffee Shop,Metro Station,Gas Station,Café,Breakfast Spot,Pizza Place
19,East Toronto,4,Health Food Store,Pub,Trail,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
23,East York,4,Coffee Shop,Burger Joint,Furniture / Home Store,Sporting Goods Shop,Bank,Shopping Mall,Bike Shop,Sports Bar,Breakfast Spot,Smoothie Shop
