Web Scrape

In [11]:
import pandas as pd
 
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data = pd.read_html(url, flavor='bs4', header=0, encoding='UTF8')

Create Table

In [12]:
df = pd.concat(data, sort=False)
df.head()

Unnamed: 0.1,Postcode,Borough,Neighbourhood,Unnamed: 0,Canadian postal codes,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,...,PE,NB,QC,ON,MB,SK,AB,BC,NU/NT,YT
0,M1A,Not assigned,Not assigned,,,,,,,,...,,,,,,,,,,
1,M2A,Not assigned,Not assigned,,,,,,,,...,,,,,,,,,,
2,M3A,North York,Parkwoods,,,,,,,,...,,,,,,,,,,
3,M4A,North York,Victoria Village,,,,,,,,...,,,,,,,,,,
4,M5A,Downtown Toronto,Harbourfront,,,,,,,,...,,,,,,,,,,


Limit Output

In [13]:
df = df[["Postcode","Borough", "Neighbourhood"]]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


Convert data to String

In [14]:
df = df.applymap(str)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


Remove Boroughs with no value and reset index

In [15]:
df = df[~df['Borough'].isin(['Not assigned', 'nan'])]
df.reset_index(drop=True, inplace=True)

Check data

In [16]:
df.shape

(212, 3)

Join Postcode

In [17]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()

Check Data

In [18]:
df.shape

(103, 3)

In [19]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


Replace Not Assigned Neighborhoods 

In [20]:
df.Neighbourhood.replace('Not assigned',df.Borough,inplace=True)

Check Data

In [21]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [22]:
df.shape

(103, 3)

Woo Hoo!!

In [23]:
import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library

In [24]:
dflatlong = pd.read_csv('https://cocl.us/Geospatial_data')

print('Data downloaded and read into a dataframe!')

    

Data downloaded and read into a dataframe!


In [25]:
dflatlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [26]:
dflatlong.columns = list(map(str, dflatlong.columns))

In [27]:
dflatlong.rename(columns={'Postal Code':'Postcode'}, inplace=True)

In [28]:
dflatlong.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [29]:
pd.merge(df, dflatlong, on="Postcode")

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
