# Web Scraping With Pandas

In [1]:
import pandas as pd

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df_list = pd.read_html(url,na_values=["Not assigned"])
df = df_list[0]

# Data Cleaning

In [2]:
df = df[df['Borough'].notna()]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


# Data Shape

In [3]:
df.reset_index(drop=True,inplace=True)
df.shape

(103, 3)

# Getting Coordinates

In [9]:
!pip install pgeocode
import pgeocode
nomi = pgeocode.Nominatim('ca')
postal_code = df["Postal Code"].tolist()
for _ in (df["Postal Code"]):
    location = nomi.query_postal_code(postal_code)
    lat,long = (location.latitude, location.longitude)



In [10]:
lat.reset_index(drop=True,inplace=True)
df["Lat"] = lat
df['Long'] = long

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [11]:
df['Lat'][76] = 43.6369656

df['Long'][76]= -79.615819

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [12]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Lat,Long
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889


# Analyzing Toronto Neighbourhoods

In [13]:
!pip install folium
import folium # map rendering library



In [16]:
# create map of New York using latitude and longitude values
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode('Toronto, CA')
latitude = location.latitude
longitude = location.longitude

latitude = location.latitude
longitude = location.longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, postalcode in zip(df['Lat'], df['Long'], df['Borough'], df['Postal Code']):
    label = '{}, {}'.format(postalcode, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Reducing the data by using only borough with "Toronto" in the name

In [20]:
toronto_data = df[df['Borough'].str.contains('Toronto', regex=False)].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Lat,Long
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
3,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
4,M4E,East Toronto,The Beaches,43.6784,-79.2941


### Map from only "Toronto" borough

In [21]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, postalcode, borough in zip(toronto_data['Lat'], toronto_data['Long'], toronto_data['Postal Code'], toronto_data['Borough']):
    label = '{}, {}'.format(postalcode, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define the Foursquare Credentials and version you required

In [22]:
CLIENT_ID = 'B2F2YEFOBWBVYG0MSN1VSWGOL2CELJ0VY02OLVLBTSTWXWAO' # your Foursquare ID
CLIENT_SECRET = 'MUH2VRLB40QRF0OCMZAB4HDQZ5XELC2YWQYWG2ZFH32QUJDP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: B2F2YEFOBWBVYG0MSN1VSWGOL2CELJ0VY02OLVLBTSTWXWAO
CLIENT_SECRET:MUH2VRLB40QRF0OCMZAB4HDQZ5XELC2YWQYWG2ZFH32QUJDP


### Explore the Neighborhoods selected in the dataframe

In [23]:
toronto_data.loc[1, 'Neighbourhood']

"Queen's Park, Ontario Provincial Government"

In [24]:
borough_latitude = toronto_data.loc[0, 'Lat'] # neighborhood latitude value
borough_longitude = toronto_data.loc[0, 'Long'] # neighborhood longitude value

borough_name = toronto_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(borough_name, 
                                                               borough_latitude, 
                                                               borough_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.6555, -79.3626.


### Getting venues from Foursquare API

In [25]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    borough_latitude, 
    borough_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=B2F2YEFOBWBVYG0MSN1VSWGOL2CELJ0VY02OLVLBTSTWXWAO&client_secret=MUH2VRLB40QRF0OCMZAB4HDQZ5XELC2YWQYWG2ZFH32QUJDP&v=20180605&ll=43.6555,-79.3626&radius=500&limit=100'

In [26]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

results = requests.get(url).json()

In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [28]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Tandem Coffee,Coffee Shop,43.653559,-79.361809
1,Roselle Desserts,Bakery,43.653447,-79.362017
2,Figs Breakfast & Lunch,Breakfast Spot,43.655675,-79.364503
3,The Yoga Lounge,Yoga Studio,43.655515,-79.364955
4,Sumach Espresso,Coffee Shop,43.658135,-79.359515


In [29]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

23 venues were returned by Foursquare.


## Exploring the Neighborhoods

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Lat'],
                                   longitudes=toronto_data['Long']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [34]:
print(toronto_venues.shape)
toronto_venues

(1540, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.6555,-79.3626,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.6555,-79.3626,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.6555,-79.3626,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Regent Park, Harbourfront",43.6555,-79.3626,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,"Regent Park, Harbourfront",43.6555,-79.3626,Sumach Espresso,43.658135,-79.359515,Coffee Shop
5,"Regent Park, Harbourfront",43.6555,-79.3626,Body Blitz Spa East,43.654735,-79.359874,Spa
6,"Regent Park, Harbourfront",43.6555,-79.3626,Sukhothai,43.658444,-79.365681,Thai Restaurant
7,"Regent Park, Harbourfront",43.6555,-79.3626,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
8,"Regent Park, Harbourfront",43.6555,-79.3626,Rooster Coffee,43.651900,-79.365609,Coffee Shop
9,"Regent Park, Harbourfront",43.6555,-79.3626,Berkeley Church,43.655123,-79.365873,Event Space


In [35]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,90,90,90,90,90,90
"Brockton, Parkdale Village, Exhibition Place",39,39,39,39,39,39
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",15,15,15,15,15,15
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",58,58,58,58,58,58
Central Bay Street,72,72,72,72,72,72
Christie,12,12,12,12,12,12
Church and Wellesley,72,72,72,72,72,72
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,23,23,23,23,23,23
Davisville North,6,6,6,6,6,6


In [36]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 212 uniques categories.


### Analyzing each neighbourhood

In [37]:
# one hot encoding, creating dummy values 
toronto_onehotencoding = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehotencoding['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehotencoding.columns[-1]] + list(toronto_onehotencoding.columns[:-1])
toronto_onehotencoding = toronto_onehotencoding[fixed_columns]

toronto_onehotencoding.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
toronto_onehotencoding.shape

(1540, 213)

### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [39]:
toronto_grouped = toronto_onehotencoding.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.011111,0.022222,0.0,0.0,0.0,0.0,0.011111,...,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111
1,"Brockton, Parkdale Village, Exhibition Place",0.025641,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.017241
4,Central Bay Street,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,...,0.0,0.0,0.013889,0.013889,0.0,0.013889,0.0,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.013889,0.013889,0.0,0.0,0.013889,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.03,0.01,0.0,0.0,0.03,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
8,Davisville,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
toronto_grouped.shape

(39, 213)

### Getting each neighborhood along with the top 5 most common venues

In [41]:
num_top_venues = 5
for neigh in toronto_grouped['Neighbourhood']:
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == neigh].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})

### Sorting the venues in descending order

In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

### Create the new dataframe and display the top 10 venues for each neighborhood.


In [43]:
import numpy as np
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.shape

(39, 11)

### Run k-means to cluster the neighborhood into 5 clusters.


In [44]:
from sklearn.cluster import KMeans
import sklearn.cluster.k_means_
km = KMeans(n_clusters=3, init='k-means++', max_iter=100, n_init=1, 
  verbose=True)

In [45]:
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(toronto_grouped_clustering)
print(kmeans.labels_[0:10])
print(len(kmeans.labels_))

[0 0 0 0 0 0 0 0 0 0]
39


### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [46]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [47]:
toronto_merged = toronto_data
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626,0,Coffee Shop,Breakfast Spot,Theater,Distribution Center,Pub,Restaurant,Electronics Store,Event Space,Food Truck,Spa
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889,0,Coffee Shop,Dance Studio,Distribution Center,Mexican Restaurant,Café,Bubble Tea Shop,Ramen Restaurant,College Cafeteria,Portuguese Restaurant,College Theater
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Ramen Restaurant,Plaza,Pizza Place,Hotel
3,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756,0,Café,Coffee Shop,Seafood Restaurant,Clothing Store,American Restaurant,Restaurant,Italian Restaurant,Cocktail Bar,Cosmetics Shop,Park
4,M4E,East Toronto,The Beaches,43.6784,-79.2941,0,Pub,Cheese Shop,Neighborhood,Bakery,Trail,Gastropub,Health Food Store,Event Space,Ethiopian Restaurant,Falafel Restaurant
5,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754,0,Coffee Shop,Café,Hotel,Bakery,Seafood Restaurant,Beer Bar,Restaurant,Japanese Restaurant,Pub,Cheese Shop
6,M5G,Downtown Toronto,Central Bay Street,43.6564,-79.386,0,Coffee Shop,Italian Restaurant,Café,Sushi Restaurant,Breakfast Spot,Clothing Store,Restaurant,Bubble Tea Shop,Sandwich Place,Middle Eastern Restaurant
7,M6G,Downtown Toronto,Christie,43.6683,-79.4205,0,Café,Grocery Store,Athletics & Sports,Candy Store,Baby Store,Coffee Shop,Playground,Park,Fish & Chips Shop,Eastern European Restaurant
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6496,-79.3833,0,Café,Coffee Shop,Gym,Hotel,Asian Restaurant,Sushi Restaurant,Japanese Restaurant,Salad Place,Steakhouse,Restaurant
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.6655,-79.4378,0,Grocery Store,Park,Furniture / Home Store,Bakery,Athletics & Sports,Brazilian Restaurant,Pet Store,Wine Shop,Gym / Fitness Center,Gym


### Finally, let's visualize the resulting clusters by Postal Code

In [51]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib import colors
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Lat'], toronto_merged['Long'], toronto_merged['Postal Code'],kmeans.labels_):
    label = folium.Popup(str(poi) + '\n(Cluster: ' + str(cluster)+')', parse_html=True)
    folium.CircleMarker([lat, lon], radius=5, popup=label, color=rainbow[cluster-1], fill=True, fill_color=rainbow[cluster-1], fill_opacity=0.7).add_to(map_clusters)
map_clusters

### CLUSTER 1

In [48]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,0,Coffee Shop,Breakfast Spot,Theater,Distribution Center,Pub,Restaurant,Electronics Store,Event Space,Food Truck,Spa
1,M7A,0,Coffee Shop,Dance Studio,Distribution Center,Mexican Restaurant,Café,Bubble Tea Shop,Ramen Restaurant,College Cafeteria,Portuguese Restaurant,College Theater
2,M5B,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Ramen Restaurant,Plaza,Pizza Place,Hotel
3,M5C,0,Café,Coffee Shop,Seafood Restaurant,Clothing Store,American Restaurant,Restaurant,Italian Restaurant,Cocktail Bar,Cosmetics Shop,Park
4,M4E,0,Pub,Cheese Shop,Neighborhood,Bakery,Trail,Gastropub,Health Food Store,Event Space,Ethiopian Restaurant,Falafel Restaurant
