# Notebook to scrape the Wikipedia page

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

### Parse the Wikipedia page

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

### Find the first table on the Wikipedia page and iterate through tags for required information

In [3]:
table_can_zipinfo = soup.find('table')
colvals = table_can_zipinfo.find_all('td')

elem_cnt = len(colvals)

postcode = []
borough = []
neighborhood = []

for i in range(0, elem_cnt, 3):
    postcode.append(colvals[i].text.strip())
    borough.append(colvals[i+1].text.strip())
    neighborhood.append(colvals[i+2].text.strip())

### Build the dataframe from the list of values

In [5]:
df_can_postcode = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df_can_postcode.columns = ['Postcode', 'Borough', 'Neighborhood']

### Cleanse the data and transform as per the given requirements

In [6]:
df_can_postcode.drop(df_can_postcode[df_can_postcode['Borough'] == 'Not assigned'].index, inplace=True)
df_can_postcode.loc[df_can_postcode.Neighborhood == 'Not assigned', "Neighborhood"] = df_can_postcode.Borough

### Group the data by Postcode & Borough

In [7]:
df_grp_can = df_can_postcode.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_grp_can.columns = ['Postcode', 'Borough', 'Neighborhood']

### Print the number of rows of your dataframe..

In [8]:
df_grp_can.shape

(103, 3)