### Import Library

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

### Web Scraping

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(source, 'lxml')
toronto_table = soup.find('table', class_='wikitable sortable')

### Convert to Dataframe

In [3]:
postal_code_list = []
borough_list = []
neighborhood_list = []

for row in toronto_table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) == 3:
        postal_code_list.append(cells[0].find(text = True).strip())
        borough_list.append(cells[1].find(text = True).strip())
        neighborhood_list.append(cells[2].find(text = True).strip())

df = pd.DataFrame()
df['Postal code'] = postal_code_list
df['Borough'] = borough_list
df['Neighborhood'] = neighborhood_list
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


### Data Cleaning

#### Ignore cells with a borough that is Not assigned.

In [4]:
index = df[df['Borough'] == 'Not assigned'].index
df.drop(index , inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


#### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [5]:
df['Neighborhood'].unique()

array(['Parkwoods', 'Victoria Village', 'Regent Park / Harbourfront',
       'Lawrence Manor / Lawrence Heights',
       "Queen's Park / Ontario Provincial Government", 'Islington Avenue',
       'Malvern / Rouge', 'Don Mills', 'Parkview Hill / Woodbine Gardens',
       'Garden District, Ryerson', 'Glencairn',
       'West Deane Park / Princess Gardens / Martin Grove / Islington / Cloverdale',
       'Rouge Hill / Port Union / Highland Creek', 'Woodbine Heights',
       'St. James Town', 'Humewood-Cedarvale',
       'Eringate / Bloordale Gardens / Old Burnhamthorpe / Markland Wood',
       'Guildwood / Morningside / West Hill', 'The Beaches',
       'Berczy Park', 'Caledonia-Fairbanks', 'Woburn', 'Leaside',
       'Central Bay Street', 'Christie', 'Cedarbrae', 'Hillcrest Village',
       'Bathurst Manor / Wilson Heights / Downsview North',
       'Thorncliffe Park', 'Richmond / Adelaide / King',
       'Dufferin / Dovercourt Village', 'Scarborough Village',
       'Fairview / Henry Far

In [6]:
df2 = df[df['Neighborhood'] == 'Not assigned']
df2

Unnamed: 0,Postal code,Borough,Neighborhood


#### As we see above, There is no more 'Not Assigned' in Neighborhood

#### Show the number of rows of DataFrame

In [7]:
df.shape

(103, 3)