# Create a new notebook and import all the libraries

In [49]:
import urllib.request
import bs4 as bs
import pandas as pd

# Use urllib to get the page and use BeautifulSoup to scrape the page

In [50]:
source = urllib.request.urlopen('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').read()
soup = bs.BeautifulSoup(source)
table = soup.find('table', attrs={'class': 'wikitable sortable'})
rows = []
for tr in table.find_all('tr'):
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td]
    # Don't import rows whose borough is 'not assigned'
    if row and row[1] != 'Not assigned':
        # assign the neighborhood to borough if it is not assigned
        if row[2] == 'Not assigned':
            row[2] = row[1]
        rows.append(row)
        

# Create the pandas dataframe

In [51]:

df = pd.DataFrame(rows, columns=['PostalCode', 'Borough', 'NeighborHood'])
df.head()

Unnamed: 0,PostalCode,Borough,NeighborHood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


# Concatenate the neighborhoods belong to the same borough

In [52]:
df = df.groupby(['PostalCode', 'Borough'])['NeighborHood'].apply(lambda n: ', '.join(n)).to_frame().reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,NeighborHood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


# Print the shape

In [53]:
print(df.shape)

(103, 3)


# Read latitude and longitude

In [60]:
ll = pd.read_csv('https://cocl.us/Geospatial_data')
ll.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Set Latitude and Longitude

In [63]:
df = df.merge(ll, left_on='PostalCode', right_on='Postal Code').drop('Postal Code', axis=1)
df

Unnamed: 0,PostalCode,Borough,NeighborHood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
