### import relevant packages

In [1]:
import pandas as pd
import numpy as np
import requests
import io

### Get text from wikipage

In [2]:
wikipedia_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_link = requests.get(wikipedia_link)

In [3]:
page = raw_wikipedia_link.text
print(page)

<!DOCTYPE html>
<html class="client-nojs" lang="en" dir="ltr">
<head>
<meta charset="UTF-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );</script>
<script>(window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":876823784,"wgRevisionId":876823784,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","w

### read html-page in pandas dataframe, reset index and rename columns

In [4]:
data = pd.read_html(page)
print(data)

[            0                 1  \
0    Postcode           Borough   
1         M1A      Not assigned   
2         M2A      Not assigned   
3         M3A        North York   
4         M4A        North York   
5         M5A  Downtown Toronto   
6         M5A  Downtown Toronto   
7         M6A        North York   
8         M6A        North York   
9         M7A      Queen's Park   
10        M8A      Not assigned   
11        M9A         Etobicoke   
12        M1B       Scarborough   
13        M1B       Scarborough   
14        M2B      Not assigned   
15        M3B        North York   
16        M4B         East York   
17        M4B         East York   
18        M5B  Downtown Toronto   
19        M5B  Downtown Toronto   
20        M6B        North York   
21        M7B      Not assigned   
22        M8B      Not assigned   
23        M9B         Etobicoke   
24        M9B         Etobicoke   
25        M9B         Etobicoke   
26        M9B         Etobicoke   
27        M9B      

In [5]:
data1 = pd.DataFrame(data[0])
data1.columns = ['PostalCode','Borough','Neighbourhood']
data1.drop([0],axis=0,inplace=True)
data1 = data1.reset_index()

### Drop rows where Borough is not assigned

In [7]:
data1.drop(data1[data1['Borough']=="Not assigned"].index,axis=0, inplace=True)
print(data1)

     index PostalCode           Borough  \
2        3        M3A        North York   
3        4        M4A        North York   
4        5        M5A  Downtown Toronto   
5        6        M5A  Downtown Toronto   
6        7        M6A        North York   
7        8        M6A        North York   
8        9        M7A      Queen's Park   
10      11        M9A         Etobicoke   
11      12        M1B       Scarborough   
12      13        M1B       Scarborough   
14      15        M3B        North York   
15      16        M4B         East York   
16      17        M4B         East York   
17      18        M5B  Downtown Toronto   
18      19        M5B  Downtown Toronto   
19      20        M6B        North York   
22      23        M9B         Etobicoke   
23      24        M9B         Etobicoke   
24      25        M9B         Etobicoke   
25      26        M9B         Etobicoke   
26      27        M9B         Etobicoke   
27      28        M1C       Scarborough   
28      29 

More than one neighborhood can exist in one postal code area. These rows will be combined into one row with the neighborhoods separated with a comma:

In [8]:
data2 = data1.groupby('PostalCode').agg(lambda x:','.join(set(x)))

if neighbourhood is not assigned assign borough

In [9]:
data2.loc[data2['Neighbourhood']=="Not assigned",'Neighbourhood']=data2.loc[data2['Neighbourhood']=="Not assigned",'Borough']

print shape of dataframe

In [10]:
data2.shape

(103, 2)

add geocordinates to dataframe

In [12]:
geocor = pd.read_csv('http://cocl.us/Geospatial_data')

In [13]:
data2['Latitude'] = geocor.Latitude.values
data2['Longitude'] = geocor.Longitude.values
print(data2.head())

                Borough                         Neighbourhood   Latitude  \
PostalCode                                                                 
M1B         Scarborough                         Rouge,Malvern  43.806686   
M1C         Scarborough  Highland Creek,Rouge Hill,Port Union  43.784535   
M1E         Scarborough       West Hill,Guildwood,Morningside  43.763573   
M1G         Scarborough                                Woburn  43.770992   
M1H         Scarborough                             Cedarbrae  43.773136   

            Longitude  
PostalCode             
M1B        -79.194353  
M1C        -79.160497  
M1E        -79.188711  
M1G        -79.216917  
M1H        -79.239476  


Cluster and visualize the boroughs with folium

In [14]:
!conda install -c conda-forge folium=0.5.0

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  38.22 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  34.45 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  40.56 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  48.33 MB/s


In [15]:
import folium
center_lat=data2.Latitude.mean()
center_long=data2.Longitude.mean()

In [20]:
map1 = folium.Map(location=[center_lat, center_long], zoom_start=13)
for lat, lng, label in zip(data2.Latitude, data2.Longitude, data2.Borough):
    folium.features.CircleMarker(
        [lat, lng],
        radius=2,
        color='blue',
        popup=label,
        fill = True,
        fill_color = 'red',
        fill_opacity = 0.6,
).add_to(map1)

In [19]:
map1