In [1]:
import numpy as np
import pandas as pd 
import requests
import requests
from bs4 import BeautifulSoup

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})
Links = My_table.findAll('tr')
PostalCode = []
Borough = []
Neighborhood = []
for link in Links[1:]:
    if link.findAll('td')[1].text == 'Not assigned':
        continue
    PostalCode.append(link.findAll('td')[0].text)
    Borough.append(link.findAll('td')[1].text)
    if link.findAll('td')[2].text[:-1] == 'Not assigned':
        Neighborhood.append(link.findAll('td')[1].text)
    else:
        Neighborhood.append(link.findAll('td')[2].text[:-1])
df = pd.DataFrame()
df['PostalCode'] = PostalCode
df['Borough'] = Borough
df['Neighborhood'] = Neighborhood
def f(a):
    ret = ""
    for item in a:
        if ret == "":
            ret = item
        else:
            ret = ret + ", " + item
    return ret

df = df.groupby(['PostalCode', 'Borough'], as_index= False).agg(f)

In [3]:
data = pd.read_csv("Geospatial_Coordinates.csv")
data = data.rename(columns = {"Postal Code": "PostalCode"})

In [4]:
combined = pd.merge(df, data, left_on = "PostalCode", right_on = "PostalCode")
combined.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [5]:
downtown_data = combined[combined['Borough'] == "Downtown Toronto"]
downtown_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
50,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
51,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
53,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
54,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
55,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
56,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
58,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
59,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [6]:
downtown_data_analysis = pd.get_dummies(downtown_data[['Neighborhood']], prefix="", prefix_sep="")

In [7]:
from sklearn.cluster import KMeans
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_data_analysis)
kmeans.labels_[0:10] 

array([2, 0, 0, 0, 0, 0, 0, 4, 0, 0], dtype=int32)

In [8]:
downtown_data.insert(0, 'Cluster Labels', kmeans.labels_)
downtown_data

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,Latitude,Longitude
50,2,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
51,0,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
52,0,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
53,0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
54,0,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
55,0,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
56,0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
57,4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
58,0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
59,0,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [9]:
import folium
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



In [10]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [11]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [13]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighborhood'], downtown_data['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters