In [19]:
import numpy as np 
import pandas as pd 

In [20]:
import requests
from pandas import DataFrame
from tabulate import tabulate
from bs4 import BeautifulSoup

df=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [21]:
#Rename columns
df.columns = ['Postcode','Borough','Neighbourhood']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [22]:
#Delete the first row with index 0
df1 = df.drop(df.index[0])
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"


In [23]:
#Delete rows that don't have an assigned Borough
df2 = df1[df1.Borough != 'Not assigned']
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [24]:
#If a cell has a borough but not an assigned neighborhood, then the neighborhood = borough.
pd.options.mode.chained_assignment = None

df2.Neighbourhood[df2.Neighbourhood == "Not assigned"] = df2.Borough

df2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [25]:
# Combine Neighbourhoods with the same Boroughs

df3 = df2.groupby(['Postcode','Borough'])['Neighbourhood'].agg(lambda x: tuple(x)).reset_index()
df3.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"(Malvern, Rouge,)"
1,M1C,Scarborough,"(Rouge Hill, Port Union, Highland Creek,)"
2,M1E,Scarborough,"(Guildwood, Morningside, West Hill,)"
3,M1G,Scarborough,"(Woburn,)"
4,M1H,Scarborough,"(Cedarbrae,)"


In [26]:
# Combining Neighbourhoods with the same Boroughs

df3 = df2.groupby(['Postcode','Borough'])['Neighbourhood'].agg(lambda x: tuple(x)).reset_index()
df3.head(11)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"(Malvern, Rouge,)"
1,M1C,Scarborough,"(Rouge Hill, Port Union, Highland Creek,)"
2,M1E,Scarborough,"(Guildwood, Morningside, West Hill,)"
3,M1G,Scarborough,"(Woburn,)"
4,M1H,Scarborough,"(Cedarbrae,)"
5,M1J,Scarborough,"(Scarborough Village,)"
6,M1K,Scarborough,"(Kennedy Park, Ionview, East Birchmount Park,)"
7,M1L,Scarborough,"(Golden Mile, Clairlea, Oakridge,)"
8,M1M,Scarborough,"(Cliffside, Cliffcrest, Scarborough Village We..."
9,M1N,Scarborough,"(Birch Cliff, Cliffside West,)"


In [27]:
Geospatial_data = pd.read_csv('http://cocl.us/Geospatial_data')
Geospatial_data.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
Geospatial_data.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [28]:
df3.shape

(103, 3)

In [29]:
# Dataframe with coordinates 
Toronto_df_with_coordinates = pd.merge(df3, Geospatial_data, how='inner', on = 'Postcode')
Toronto_df_with_coordinates.head(11)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"(Malvern, Rouge,)",43.806686,-79.194353
1,M1C,Scarborough,"(Rouge Hill, Port Union, Highland Creek,)",43.784535,-79.160497
2,M1E,Scarborough,"(Guildwood, Morningside, West Hill,)",43.763573,-79.188711
3,M1G,Scarborough,"(Woburn,)",43.770992,-79.216917
4,M1H,Scarborough,"(Cedarbrae,)",43.773136,-79.239476
5,M1J,Scarborough,"(Scarborough Village,)",43.744734,-79.239476
6,M1K,Scarborough,"(Kennedy Park, Ionview, East Birchmount Park,)",43.727929,-79.262029
7,M1L,Scarborough,"(Golden Mile, Clairlea, Oakridge,)",43.711112,-79.284577
8,M1M,Scarborough,"(Cliffside, Cliffcrest, Scarborough Village We...",43.716316,-79.239476
9,M1N,Scarborough,"(Birch Cliff, Cliffside West,)",43.692657,-79.264848
