#### Import required libraries

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

#### Create a source and soup object to scan and store fetched data from the link

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

#### Iterate through the data with BeautifulSoup and append in designated objects

In [3]:
data = []
columns = []
table = soup.find(class_='wikitable')

for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    

    if (index == 0):
        columns = section
    else:
        data.append(section)

#### Create new dataframe from Wiki called canada

In [4]:
canada = pd.DataFrame(data = data,columns = columns)
canada.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Remove Boroughs that are 'Not assigned'

In [5]:
canada = canada[canada['Borough'] != 'Not assigned']
canada.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Combine multiple neighbourhoods in the same postal code into one row, with the neighborhoods separated by a comma

In [6]:
canada["Neighborhood"] = canada.groupby("Postal Code")["Neighborhood"].transform(lambda neigh: ', '.join(neigh))
canada.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Shape of the canada dataframe

In [7]:
canada.shape

(103, 3)

In [9]:
canada.to_csv (r'canada.csv', header=True)