# Neighborhoods in Toronto (data on Wikipedia was last updated on 15 July 2019)

## Importing Required Libraries

In [0]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

## Scraping the Wikipedia page and getting the table content

In [0]:
wiki = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
wiki_content = wiki.content
soup = BeautifulSoup(wiki_content)
table_content = soup.find('table', attrs = {'class': 'wikitable'})
table_rows = table_content.find_all('tr')


## Transforming table content in pandas dataframe and removing rows having values 'Not assigned'

In [0]:
res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)


df = pd.DataFrame(res, columns=["Postcode", "Borough", "Neighborhood"])
df = df[df.Borough != 'Not assigned']
df = df[df.Neighborhood != 'Not assigned']

In [0]:
## Combining rows on basis of Boroughs

In [26]:
df = df.groupby('Borough').agg({'Postcode':'first', 
                             'Neighborhood': ', '.join }).reset_index()
df

Unnamed: 0,Borough,Postcode,Neighborhood
0,Central Toronto,M4N,"Lawrence Park, Roselawn, Davisville North, For..."
1,Downtown Toronto,M5A,"Harbourfront, Regent Park, Ryerson, Garden Dis..."
2,East Toronto,M4E,"The Beaches, The Danforth West, Riverdale, The..."
3,East York,M4B,"Woodbine Gardens, Parkview Hill, Woodbine Heig..."
4,Etobicoke,M9A,"Islington Avenue, Cloverdale, Islington, Marti..."
5,Mississauga,M7R,Canada Post Gateway Processing Centre
6,North York,M3A,"Parkwoods, Victoria Village, Lawrence Heights,..."
7,Scarborough,M1B,"Rouge, Malvern, Highland Creek, Rouge Hill, Po..."
8,West Toronto,M6H,"Dovercourt Village, Dufferin, Little Portugal,..."
9,York,M6C,"Humewood-Cedarvale, Caledonia-Fairbanks, Del R..."


## Using shape method to print number of rows in the dataframe

In [27]:
df.shape

(10, 3)