# TORONTO NEIGHBORHOODS PROJECT

In [1]:
#Webscraping
!pip install bs4
import pandas as pd
import requests
from bs4 import BeautifulSoup




In [2]:
url= "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html_data=requests.get(url).text
soup=BeautifulSoup(html_data,"html5lib")
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en"><head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"be0a040d-f355-438a-a732-b8fa8a6a88a2","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":1019189119,"wgRevisionId":1019189119,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Wikipedia semi-protected pag

In [3]:
#Get dataframe
toronto_neighborhood_data = pd.DataFrame(columns=["Postal_code", "Borough", "Neighborhood","Latitude","Longitude"])

for item in soup.find("tbody").find_all("p"):
    #Exclude those Not assigned
    if len(item.findChildren("a"))<2:
        continue
    Postal_code = item.findChildren("b")[0].text
    Borough = item.findChildren("a")[0].text
    Neighborhood = ', '.join([str(x.text) for x in item.findChildren("a")[1:]])
    toronto_neighborhood_data = toronto_neighborhood_data.append({"Postal_code":Postal_code, "Borough":Borough, "Neighborhood":Neighborhood}, ignore_index=True)

#remove duplicates
toronto_neighborhood_data.drop_duplicates(subset ="Neighborhood",
                     keep = False, inplace = True)
toronto_neighborhood_data.reset_index(drop=True, inplace=True)
toronto_neighborhood_data

Unnamed: 0,Postal_code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",,
3,M6A,North York,"Lawrence Manor, Lawrence Heights",,
4,M9A,Etobicoke,Islington Avenue,,
...,...,...,...,...,...
75,M5X,Downtown Toronto,"First Canadian Place, Underground city",,
76,M8X,Etobicoke,"The Kingsway, Old Mill",,
77,M4Y,Downtown Toronto,Church and Wellesley,,
78,M8Y,Etobicoke,"Old Mill, Sunnylea, Humber Bay, Mimico, The Qu...",,


In [4]:
#Get latitude and longitude
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="foursquare_agent")
for items in toronto_neighborhood_data.Neighborhood:
    location = geolocator.geocode(items + ", Toronto")
    if location==None:
        continue
    Latitude = location.latitude
    Longitude = location.longitude
    index=toronto_neighborhood_data[toronto_neighborhood_data['Neighborhood']==items].index.item()
    toronto_neighborhood_data.Longitude[index] = Longitude
    toronto_neighborhood_data.Latitude[index] = Latitude
toronto_neighborhood_data.dropna(inplace=True)

In [5]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [6]:

# create map of Toronto using latitude and longitude values
location = geolocator.geocode('Toronto')
Latitude_toronto = location.latitude
Longitude_toronto = location.longitude
map_toronto = folium.Map(location=[Latitude_toronto, Longitude_toronto], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighborhood_data['Latitude'], toronto_neighborhood_data['Longitude'], toronto_neighborhood_data['Borough'], toronto_neighborhood_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [9]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [12]:

X = toronto_neighborhood_data[['Longitude','Latitude']]
print(X)
cluster_dataset = StandardScaler().fit_transform(X)
print(cluster_dataset)

k_means = KMeans(init="k-means++", n_clusters=5, n_init=12)
k_means.fit(cluster_dataset)
toronto_neighborhood_data['Cluster Labels']=k_means.labels_
toronto_neighborhood_data.loc[30:80]

    Longitude   Latitude
0  -79.320197    43.7588
1  -79.311189  43.732658
2  -79.385263  43.642981
3  -79.442566  43.716391
4  -79.542802  43.688307
5  -79.221701  43.809196
6  -79.345944  43.775347
7  -79.324842  43.703867
11 -79.339889  43.724451
12 -79.319279   43.69992
13 -79.372704  43.669403
14  -79.42808  43.688322
15 -79.585434  43.631239
16 -79.193699  43.790793
18 -79.225291  43.759824
19  -79.36809  43.704798
20 -79.385393  43.659756
21 -79.226692  43.756467
22 -79.425712  43.681695
24 -79.345407  43.704553
25 -79.391373  43.648587
26 -79.432636  43.665307
27 -79.211632  43.743742
28 -79.354296  43.769509
30 -79.300355  43.686433
32 -79.395255  43.665309
34 -79.376662  43.769197
36 -79.352594   43.66547
40 -79.296712  43.671024
42 -79.493416  43.715824
43  -79.57176  43.760078
44 -79.230463  43.726252
45  -79.41309  43.795986
46 -79.430197  43.721648
50 -79.480327   43.66107
51 -79.516247  43.700161
53 -79.470352  43.665478
55 -79.384594  43.670933
56 -79.451038  43.651443


Unnamed: 0,Postal_code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
30,M4J,East York,The Danforth,43.686433,-79.300355,3
32,M6J,Little Portugal,Trinity,43.665309,-79.395255,3
34,M2K,North York,Bayview Village,43.769197,-79.376662,2
36,M4K,The Danforth,Riverdale,43.66547,-79.352594,3
40,M4L,India Bazaar,The Beaches,43.671024,-79.296712,3
42,M6L,North York,Maple Leaf Park,43.715824,-79.493416,0
43,M9L,North York,Humber Summit,43.760078,-79.57176,0
44,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village",43.726252,-79.230463,1
45,M2M,North York,Newtonbrook,43.795986,-79.41309,2
46,M5M,North York,"Bedford Park, Lawrence Manor",43.721648,-79.430197,0
