### Previous code

In [1]:
# Creating dataframe

from bs4 import BeautifulSoup
import requests

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
tb = soup.find('table', class_='wikitable')

headings = []
headers = tb.find_all('th')
for i in range(len(headers)):
    headings.append(headers[i].get_text().replace('\n',''))
    
main = []
sub = []
values = tb.find_all('td')

for i in range(len(values)):
    val = values[i].get_text().replace('\n','')
    if i == 0:
        sub.append(val)
    if i%3 != 0:
        sub.append(val)
    if i%3 == 0 and i != 0:
        main.append(sub)
        sub = []
        sub.append(val)
    if i == range(len(values))[-1]:
        main.append(sub)

import pandas as pd
df = pd.DataFrame(columns=headings, data=main)
df_clean = df[df['Borough'] != 'Not assigned']

In [2]:
# Question 2 Answer

coord = pd.read_csv('Geospatial_Coordinates.csv')
coord.columns = ['Postcode', 'Latitude','Longitude']
df_q2 = pd.merge(df_clean, coord, on='Postcode')

### Question 3: EDA

_Explore and cluster the neighborhoods in Toronto. _

In [3]:
df_q2['Borough'].value_counts()

Etobicoke           45
North York          38
Scarborough         37
Downtown Toronto    37
Central Toronto     17
West Toronto        13
York                 9
East Toronto         7
East York            6
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

Toronoto is split into four areas: Downtown Toronto, Central Toronto, West Toronto and East Toronto

In [4]:
toronto = df_q2[df_q2['Borough'].isin(['Downtown Toronto', 'Central Toronto', 'West Toronto' and 'East Toronto'])]
toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
13,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
14,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
27,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [5]:
print('Toronto has', len(toronto['Postcode'].unique()), 'different postcodes')

Toronto has 32 different postcodes


In [6]:
neighbourhoods = list(toronto['Neighbourhood'])
print('Toronto has', len(set(neighbourhoods)), 'different neighbourhoods')

Toronto has 60 different neighbourhoods


In [7]:
toronto[0:5]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
13,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
14,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
27,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [8]:
from sklearn.cluster import KMeans
clustering = toronto.drop(['Postcode', 'Borough', 'Neighbourhood'], 1)
kmeans = KMeans(n_clusters=5, random_state=0).fit(clustering)
labels = kmeans.predict(clustering)

In [9]:
toronto = toronto.assign(Cluster = labels) 
centroids = kmeans.cluster_centers_

In [10]:
import geopy
from geopy.geocoders import Nominatim
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [13]:
import folium
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

#set color scheme for the clusters
x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood'], 
                                  toronto['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters