# WHICH NEIGHBORHOODS IN TORONTO LACK SHOPPING MALLS?

## COURSERA IBM DATA SCIENCE CAPSTONE ASSIGNMENT

## by Viatcheslav Liachenko


# # 1. Scrapping Wikipedia for Toronto's neighborhoods, adding coordinates and preprocessing data

In [1]:
# Importing libraries to scrap wikipedia table into a dataframe, preprocess and add coordinates
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
print ("Libraries imported")

Libraries imported


In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.tbody.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Processing the cells that have an assigned borough only.
    if row != [] and row[1] != "Not assigned":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned" in row[2]: 
            row[2] = row[1]
        res.append(row)

df = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


In [3]:
# Preprocessing: Remove "\n" at the end of each string in the Neighborhood, Borough and PostalCode columns and group all neighborhoods with the same postal code
df["Neighborhood"] = df["Neighborhood"].str.replace("\n","")
df["PostalCode"] = df["PostalCode"].str.replace("\n","")
df["Borough"] = df["Borough"].str.replace("\n","")
df = df.groupby(["PostalCode", "Borough"])["Neighborhood"].apply(", ".join).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M1B,Scarborough,"Malvern, Rouge"
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn


In [4]:
print("Shape: ", df.shape)

Shape:  (180, 3)


In [5]:
df_coordinates = pd.read_csv("https://cocl.us/Geospatial_data")
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
# Merging two dataframes: df and df_coordinates together
df_toronto = pd.merge(df, df_coordinates, how='left', left_on = 'PostalCode', right_on = 'Postal Code')
df_toronto.drop("Postal Code", axis=1, inplace=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,,
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,M1G,Scarborough,Woburn,43.770992,-79.216917


In [7]:
df_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,,
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,M1G,Scarborough,Woburn,43.770992,-79.216917
...,...,...,...,...,...
175,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
176,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
177,M9X,Not assigned,Not assigned,,
178,M9Y,Not assigned,Not assigned,,


In [8]:
print("Shape: ", df_toronto.shape)

Shape:  (180, 5)


In [9]:
df_toronto.dropna(subset = ["Latitude"], inplace=True)
df_toronto.dropna(subset = ["Longitude"], inplace=True)
df_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,M1G,Scarborough,Woburn,43.770992,-79.216917
5,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
170,M9N,York,Weston,43.706876,-79.518188
171,M9P,Etobicoke,Westmount,43.696319,-79.532242
172,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
175,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [10]:
df_toronto.drop('Borough', axis='columns', inplace=True)
df_toronto

Unnamed: 0,PostalCode,Neighborhood,Latitude,Longitude
1,M1B,"Malvern, Rouge",43.806686,-79.194353
2,M1C,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
3,M1E,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,M1G,Woburn,43.770992,-79.216917
5,M1H,Cedarbrae,43.773136,-79.239476
...,...,...,...,...
170,M9N,Weston,43.706876,-79.518188
171,M9P,Westmount,43.696319,-79.532242
172,M9R,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
175,M9V,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [11]:
print("Shape: ", df_toronto.shape)

Shape:  (103, 4)


In [12]:
# Saving the dataframe as CSV file
df_toronto.to_csv("df_toronto.csv", index=False)

# # 2. Mapping and adding markers

In [13]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation


!pip install geopy
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# Tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize


! pip install folium==0.5.0
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [14]:
from geopy.geocoders import Nominatim 
from pandas.io.json import json_normalize  
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
print('Libraries imported.')

Libraries imported.


In [15]:
Toronto = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(Toronto)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city are 43.6534817, -79.3839347.


In [16]:
# Creating map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

In [17]:
# Adding markers of Toronto neighborhoods
for lat, lng, neighborhood in zip(
        df_toronto['Latitude'], 
        df_toronto['Longitude'], 
        df_toronto['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [18]:
# Saving the map as HTML file
map_toronto.save('map_toronto.html')

# # 3. Using FOURSQUARE to get venues, OneHot encoding and getting shopping malls by neighborhood

In [19]:
# define Foursquare credentials and version
CLIENT_ID = 'S0BINXJCMH1EEOZ05AGFGXQKKVSXMH5FL4F2K2Z20INBLS5D'
CLIENT_SECRET = 'EY433PICFA1FTVWPP4ADYACYRNWW2XLMTZH543N0KYPGZXGO'
VERSION = '20201128'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: S0BINXJCMH1EEOZ05AGFGXQKKVSXMH5FL4F2K2Z20INBLS5D
CLIENT_SECRET:EY433PICFA1FTVWPP4ADYACYRNWW2XLMTZH543N0KYPGZXGO


In [20]:
# Using Foursquare to get 100 venues within 3 km
radius = 3000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighborhood']):
    
    # Creating the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # Making the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # Returning relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [21]:
# Converting the venues list into a new dataframe
venues_df = pd.DataFrame(venues)

# Defining column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(9761, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"Malvern, Rouge",43.806686,-79.194353,African Rainforest Pavilion,43.817725,-79.183433,Zoo Exhibit
1,"Malvern, Rouge",43.806686,-79.194353,Toronto Pan Am Sports Centre,43.790623,-79.193869,Athletics & Sports
2,"Malvern, Rouge",43.806686,-79.194353,Toronto Zoo,43.820582,-79.181551,Zoo
3,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
4,"Malvern, Rouge",43.806686,-79.194353,Polar Bear Exhibit,43.823372,-79.185145,Zoo


In [22]:
# Checking how many venues in each neighborhood
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,100,100,100,100,100,100
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Wilson Heights, Downsview North",100,100,100,100,100,100
Bayview Village,100,100,100,100,100,100
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
...,...,...,...,...,...,...
"Willowdale, Willowdale West",100,100,100,100,100,100
Woburn,100,100,100,100,100,100
Woodbine Heights,100,100,100,100,100,100
York Mills West,83,83,83,83,83,83


In [23]:
# Finding unique categories of venues
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
venues_df['VenueCategory'].unique()[:50]

There are 297 uniques categories.


array(['Zoo Exhibit', 'Athletics & Sports', 'Zoo', 'Spa', 'Park',
       'Restaurant', 'Dessert Shop', 'Neighborhood', 'National Park',
       'Pharmacy', 'Bank', 'Caribbean Restaurant', 'Fried Chicken Joint',
       'Paper / Office Supplies Store', 'Liquor Store',
       'Fast Food Restaurant', 'Skating Rink', 'Pizza Place',
       'Gas Station', 'Coffee Shop', 'Supermarket', 'Sandwich Place',
       'Discount Store', 'Beer Store', 'Cosmetics Shop', 'Burger Joint',
       'Grocery Store', 'Other Great Outdoors', 'Trail',
       'Convenience Store', 'Intersection', 'Bakery', 'Baseball Field',
       'Italian Restaurant', 'Breakfast Spot', 'Campground',
       'Food & Drink Shop', 'Ice Cream Shop', 'Pub', 'Smoothie Shop',
       'Mexican Restaurant', 'Sports Bar', 'Diner', 'Greek Restaurant',
       'Pet Store', 'Gym / Fitness Center', 'Gymnastics Gym',
       'Yoga Studio', 'Bus Line', 'Train Station'], dtype=object)

In [24]:
# Checking if shopping malls are there
"Shopping Mall" in venues_df['VenueCategory'].unique()

True

In [25]:
# OneHot encoding for each neighborhood
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# Adding "Neighborhoods" column to dataframe
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# Moving "Neighborhoods" column to the left of the dataframe
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot

(9761, 298)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,Arcade,Art Gallery,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9756,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9757,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9758,"Northwest, West Humber - Clairville",0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9759,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
# Calculating mean of frequency of occurence for each category by neighborhood
toronto_grouped = toronto_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(99, 298)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,Arcade,Art Gallery,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Agincourt,0.0,0.00,0.00,0.01,0.0,0.0,0.0,0.00,0.0,...,0.02,0.00,0.0,0.0,0.01,0.0,0.00,0.00,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.01,0.0,...,0.00,0.01,0.0,0.0,0.01,0.0,0.00,0.01,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.00,0.01,0.00,0.0,0.0,0.0,0.00,0.0,...,0.00,0.01,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0
3,Bayview Village,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.0,...,0.00,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.00,0.00,0.02,0.0,0.0,0.0,0.00,0.0,...,0.00,0.01,0.0,0.0,0.01,0.0,0.00,0.01,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,"Willowdale, Willowdale West",0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.0,...,0.01,0.00,0.0,0.0,0.01,0.0,0.00,0.00,0.0,0.0
95,Woburn,0.0,0.00,0.00,0.01,0.0,0.0,0.0,0.00,0.0,...,0.01,0.00,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0
96,Woodbine Heights,0.0,0.02,0.00,0.02,0.0,0.0,0.0,0.00,0.0,...,0.00,0.01,0.0,0.0,0.00,0.0,0.00,0.01,0.0,0.0
97,York Mills West,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.0,...,0.00,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0


In [27]:
# Finding shopping malls in neighborhoods
len(toronto_grouped[toronto_grouped["Shopping Mall"] > 0])

44

In [28]:
# Filtering shopping malls by neighborhood
toronto_malls = toronto_grouped[["Neighborhoods","Shopping Mall"]]
toronto_malls

Unnamed: 0,Neighborhoods,Shopping Mall
0,Agincourt,0.010000
1,"Alderwood, Long Branch",0.010000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.010000
3,Bayview Village,0.020000
4,"Bedford Park, Lawrence Manor East",0.010000
...,...,...
94,"Willowdale, Willowdale West",0.000000
95,Woburn,0.000000
96,Woodbine Heights,0.000000
97,York Mills West,0.012048


In [29]:
no_shopping_malls = toronto_malls.sort_values('Shopping Mall', ascending=False)
no_shopping_malls.tail(55)

Unnamed: 0,Neighborhoods,Shopping Mall
81,"The Danforth West, Riverdale",0.0
17,"Cliffside, Cliffcrest, Scarborough Village West",0.0
16,"Clarks Corners, Tam O'Shanter, Sullivan",0.0
75,"Steeles West, L'Amoreaux West",0.0
72,"South Steeles, Silverstone, Humbergate, Jamest...",0.0
77,Studio District,0.0
71,Scarborough Village,0.0
10,Caledonia-Fairbanks,0.0
80,The Beaches,0.0
33,"Golden Mile, Clairlea, Oakridge",0.0


In [30]:
no_shopping_mall_list = no_shopping_malls['Neighborhoods'].tolist()
  
# show the list 
print(no_shopping_mall_list)


['Willowdale, Willowdale East', 'St. James Town, Cabbagetown', 'Bayview Village', 'Islington Avenue, Humber Valley Village', 'University of Toronto, Harbord', "Queen's Park, Ontario Provincial Government", 'Parkwoods', 'Church and Wellesley', 'Central Bay Street', 'Don Mills', 'Canada Post Gateway Processing Centre', 'York Mills West', 'Westmount', 'Moore Park, Summerhill East', 'Lawrence Manor, Lawrence Heights', 'Alderwood, Long Branch', 'Richmond, Adelaide, King', 'Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens', 'Kensington Market, Chinatown, Grange Park', 'Regent Park, Harbourfront', 'Agincourt', 'Rosedale', 'Runnymede, The Junction North', 'St. James Town', 'Glencairn', 'Stn A PO Boxes', 'Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park', 'The Annex, North Midtown, Yorkville', 'Toronto Dominion Centre, Design Exchange', 'Victoria Village', 'Willowdale, Newtonbrook', 'High Park, The Junction South', 'York Mills, Silver Hills', 'Commerce Co

# # 4. Clustering and examination of results

In [31]:
# Running K means to create 3 clusters in Toronto
kclusters = 3
toronto_clustering = toronto_malls.drop(["Neighborhoods"], 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clustering)

# Checking cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 2, 1, 1, 0, 0, 0, 0], dtype=int32)

In [32]:
# Showing top 10 venues by cluster with labels
toronto_merged = toronto_malls.copy()
toronto_merged["Cluster Labels"] = kmeans.labels_

In [33]:
toronto_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
toronto_merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Agincourt,0.010000,1
1,"Alderwood, Long Branch",0.010000,1
2,"Bathurst Manor, Wilson Heights, Downsview North",0.010000,1
3,Bayview Village,0.020000,2
4,"Bedford Park, Lawrence Manor East",0.010000,1
...,...,...,...
94,"Willowdale, Willowdale West",0.000000,0
95,Woburn,0.000000,0
96,Woodbine Heights,0.000000,0
97,York Mills West,0.012048,1


In [34]:
# Adding latitude and longitude
toronto_merged = toronto_merged.join(df_toronto.set_index("Neighborhood"), on="Neighborhood")

print(toronto_merged.shape)
toronto_merged

(103, 6)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,PostalCode,Latitude,Longitude
0,Agincourt,0.010000,1,M1S,43.794200,-79.262029
1,"Alderwood, Long Branch",0.010000,1,M8W,43.602414,-79.543484
2,"Bathurst Manor, Wilson Heights, Downsview North",0.010000,1,M3H,43.754328,-79.442259
3,Bayview Village,0.020000,2,M2K,43.786947,-79.385975
4,"Bedford Park, Lawrence Manor East",0.010000,1,M5M,43.733283,-79.419750
...,...,...,...,...,...,...
94,"Willowdale, Willowdale West",0.000000,0,M2R,43.782736,-79.442259
95,Woburn,0.000000,0,M1G,43.770992,-79.216917
96,Woodbine Heights,0.000000,0,M4C,43.695344,-79.318389
97,York Mills West,0.012048,1,M2P,43.752758,-79.400049


In [35]:
# Sorting by clusters
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged


(103, 6)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,PostalCode,Latitude,Longitude
47,Lawrence Park,0.00,0,M4N,43.728020,-79.388790
30,"Forest Hill North & West, Forest Hill Road Park",0.00,0,M5P,43.696948,-79.411307
72,"South Steeles, Silverstone, Humbergate, Jamest...",0.00,0,M9V,43.739416,-79.588437
33,"Golden Mile, Clairlea, Oakridge",0.00,0,M1L,43.711112,-79.284577
34,"Guildwood, Morningside, West Hill",0.00,0,M1E,43.763573,-79.188711
...,...,...,...,...,...,...
85,"University of Toronto, Harbord",0.02,2,M5S,43.662696,-79.400049
74,"St. James Town, Cabbagetown",0.02,2,M4X,43.667967,-79.367675
63,"Queen's Park, Ontario Provincial Government",0.02,2,M7A,43.662301,-79.389494
42,"Islington Avenue, Humber Valley Village",0.02,2,M9A,43.667856,-79.532242


In [36]:
# Mapping clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**3 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, 2, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=rainbow[cluster-2],
        fill=True,
        fill_color=rainbow[cluster-2],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
# Saving the map as HTML file
map_clusters.save('map_clusters.html')

In [38]:
# Examining cluster 0
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,PostalCode,Latitude,Longitude
47,Lawrence Park,0.0,0,M4N,43.72802,-79.38879
30,"Forest Hill North & West, Forest Hill Road Park",0.0,0,M5P,43.696948,-79.411307
72,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0,M9V,43.739416,-79.588437
33,"Golden Mile, Clairlea, Oakridge",0.0,0,M1L,43.711112,-79.284577
34,"Guildwood, Morningside, West Hill",0.0,0,M1E,43.763573,-79.188711
35,"Harbourfront East, Union Station, Toronto Islands",0.0,0,M5J,43.640816,-79.381752
71,Scarborough Village,0.0,0,M1J,43.744734,-79.239476
37,Hillcrest Village,0.0,0,M2H,43.803762,-79.363452
38,Humber Summit,0.0,0,M9L,43.756303,-79.565963
39,"Humberlea, Emery",0.0,0,M9M,43.724766,-79.532242


In [39]:
# Examining cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,PostalCode,Latitude,Longitude
76,Stn A PO Boxes,0.01,1,M5W,43.646435,-79.374846
78,"Summerhill West, Rathnelly, South Hill, Forest...",0.01,1,M4V,43.686412,-79.400049
92,"Willowdale, Newtonbrook",0.01,1,M2M,43.789053,-79.408493
73,St. James Town,0.01,1,M5C,43.651494,-79.375418
70,"Runnymede, The Junction North",0.01,1,M6N,43.673185,-79.487262
65,"Richmond, Adelaide, King",0.01,1,M5H,43.650571,-79.384568
89,Westmount,0.012048,1,M9P,43.696319,-79.532242
66,Rosedale,0.01,1,M4W,43.679563,-79.377529
87,Victoria Village,0.01,1,M4A,43.725882,-79.315572
84,"Toronto Dominion Centre, Design Exchange",0.01,1,M5K,43.647177,-79.381576


In [40]:
# Examining cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,PostalCode,Latitude,Longitude
3,Bayview Village,0.02,2,M2K,43.786947,-79.385975
93,"Willowdale, Willowdale East",0.022222,2,M2N,43.77012,-79.408493
13,Central Bay Street,0.02,2,M5G,43.657952,-79.387383
15,Church and Wellesley,0.02,2,M4Y,43.66586,-79.38316
85,"University of Toronto, Harbord",0.02,2,M5S,43.662696,-79.400049
74,"St. James Town, Cabbagetown",0.02,2,M4X,43.667967,-79.367675
63,"Queen's Park, Ontario Provincial Government",0.02,2,M7A,43.662301,-79.389494
42,"Islington Avenue, Humber Valley Village",0.02,2,M9A,43.667856,-79.532242
62,Parkwoods,0.02,2,M3A,43.753259,-79.329656


# # END OF NOTEBOOK