# Packages Needed
---

In [101]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np

# Retriving Data
---

### Request the url's html

In [102]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)

### Create soup

In [103]:
soup = bs(page.text,"lxml")
#print(soup.prettify())

### Locating the neighborhood table

In [104]:
neigh_table = soup.find('table',class_="wikitable sortable")
#neigh_table

### Retriving data from the table 

In [105]:
postal = []
borough = []
neighborhood = []

for row in neigh_table.find_all('tr'):
    cells = row.find_all('td')
    if len(cells)==3:
        postal.append(cells[0].find(text=True))
        borough.append(cells[1].find(text=True))
        neighborhood.append(cells[2].find(text=True))

# Creating Dataframe
---

In [107]:
df = pd.DataFrame()
df['Postal'] = postal
df['Borough'] = borough
df['Neighborhood'] = neighborhood
df

Unnamed: 0,Postal,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
...,...,...,...
175,M5Z\n,Not assigned\n,Not assigned\n
176,M6Z\n,Not assigned\n,Not assigned\n
177,M7Z\n,Not assigned\n,Not assigned\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."


# Cleaning Data
---

### Removing new line from dataframe

In [108]:
df = df.replace('\n','',regex=True)
df

Unnamed: 0,Postal,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### Removing any unassigned Borough

In [109]:
df['Borough'] = df['Borough'].replace('Not assigned',np.nan)
df.dropna(inplace=True)
df.head()

Unnamed: 0,Postal,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Checking for unassigned neighborhoods with a assigned borough

In [110]:
df_query = df.query('Neighborhood == "Not assigned"')
df_query

Unnamed: 0,Postal,Borough,Neighborhood


# Dataframe Shape

In [111]:
df.shape

(103, 3)