### Importing Libs

In [1]:
#!conda install beautifulsoup4 --yes 

from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv

### Get Toronto`s Wikipedia page as Text

In [2]:
wiki_toronto = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text 

### Initialize BeautifulSoup using html5lib

In [3]:
soup = BeautifulSoup(wiki_toronto,'html5lib')

### Get Toronto`s table to work on it

In [4]:
table = soup.find('table',{'class':'wikitable sortable'})

### The real scrapping:
 - Split lines
 - Exclude "Not Assigned" Boroughs
 - Add results to arrays

In [5]:
lines = table.findAll('tr')

PostalCode = []
Borough = []
Neighborhood = []

for line in lines:
    objects = line.findAll('td')
    if objects and objects[1].get_text() != 'Not assigned':
        PostalCode.append(objects[0].get_text())
        b = objects[1].get_text()
        Borough.append(b)
        n = objects[2].get_text().strip()
        Neighborhood.append(n if n != 'Not assigned' else b)


### Create DataFrame and put informations together

In [6]:
# PostalCode, Borough, Neighborhood
df = pd.DataFrame()
df['PostalCode'] = PostalCode
df['Borough'] = Borough
df['Neighborhood'] = Neighborhood

### Group Neighborhoods by PostalCode
* Agg allows to run join function
* reset_index cleans index mess made by Group By

In [7]:
final_df = df.groupby(['PostalCode','Borough']).Neighborhood.agg([('Neighborhood', ', '.join)]).reset_index()
final_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


### That`s the shape

In [8]:
print(final_df.shape)

(103, 3)


### Write result on CSV file to use on next exercise

In [9]:
final_df.to_csv('toronto_neig.csv', index = None, header=True)