In [91]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Declare dataframe

In [92]:
df = pd.DataFrame( columns = ['PostalCode', 'Borough', 'Neighbourhood']) 

### Use BeautifulSoup to scrap the table from the website

In [93]:
res=requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup=BeautifulSoup(res.content,'lxml')
table=soup.find_all('table')[0]
df=pd.read_html(str(table))[0]

### The look of the dataframe

In [94]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


### Remove the row with Borough='Not assigned'

In [95]:
df_bak=df
df=df[df.Borough !='Not assigned']
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
281,M8Z,Etobicoke,Kingsway Park South West
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West


### Grouping the Neighbourhood, combine multiple rows to one by the Postcode as the unique key

In [96]:
df=df.groupby(['Postcode', 'Borough']).agg(list)

In [97]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"[Rouge, Malvern]"
M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
M1G,Scarborough,[Woburn]
M1H,Scarborough,[Cedarbrae]
...,...,...
M9N,York,[Weston]
M9P,Etobicoke,[Westmount]
M9R,Etobicoke,"[Kingsview Village, Martin Grove Gardens, Rich..."
M9V,Etobicoke,"[Albion Gardens, Beaumond Heights, Humbergate,..."


### On the last step, column 'Neighbourhood' is list datatype, now convert it to string format

In [98]:
df['Neighbourhood']=df.Neighbourhood.apply(', '.join)
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
M1N,Scarborough,"Birch Cliff, Cliffside West"


### printing the shape of the dataframe

In [99]:
df.shape

(103, 1)