# Canada postal code data - Part 2

In [30]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [31]:
website_text = requests.get('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=942655364').text
soup = BeautifulSoup(website_text,'xml')

In [32]:
table = soup.find('table')
table_rows = table.find_all('tr')

data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

post_df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
post_df = post_df[~post_df['PostalCode'].isnull()]  # to filter out bad rows
post_df.reset_index(drop=True, inplace=True)


### Check the records

In [33]:
post_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Remove records with Borough = Not Assigned

In [34]:
post_df.drop(post_df[post_df["Borough"]=="Not assigned"].index,inplace = True)
len(post_df)

210

### Merge Neighbourhood

In [35]:
post_group = post_df.groupby("PostalCode")["Neighbourhood"]

In [36]:
from itertools import chain
for name,group in post_group:
    records_dct = dict(group)
    
    if len(records_dct) > 1:
        keylist = list(records_dct.keys())
        post_df.loc[keylist[0],"Neighbourhood"] = ', '.join(str(x) for x in records_dct.values())
        for item in keylist[1:]:
            post_df.drop(item, inplace=True)
            
post_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,"Lawrence Heights, Lawrence Manor"
7,M7A,Downtown Toronto,Queen's Park


### Reset index

In [37]:
post_df.reset_index(drop=True, inplace=True)
post_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [38]:
post_df[post_df["Neighbourhood"] == "Not assigned"]

Unnamed: 0,PostalCode,Borough,Neighbourhood


As there are no Neighbourhood with Not assigned value, so no need to process

In [39]:
post_df.shape

(103, 3)

### Find out Location co-ordinates

I tried with geocoder but it was not working, so will use CSV file to update rest of the records

In [53]:
post_df["Latitude"] = None
post_df["Longitude"] = None
post_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,Harbourfront,,
3,M6A,North York,"Lawrence Heights, Lawrence Manor",,
4,M7A,Downtown Toronto,Queen's Park,,


In [40]:
geo_df = pd.read_csv("Geospatial_Coordinates.csv")
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [64]:
#post_df[post_df["PostalCode"] == geo_df["Postal Code"]]
for index, item in post_df.iterrows():
    geo_index = geo_df.index[geo_df["Postal Code"] == item["PostalCode"]]
    if geo_index != None:
        post_df.loc[index, "Latitude"] = geo_df.loc[geo_index[0], "Latitude"]
        post_df.loc[index, "Longitude"] = geo_df.loc[geo_index[0], "Longitude"]

In [66]:
post_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,Harbourfront,43.6543,-79.3606
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.7185,-79.4648
4,M7A,Downtown Toronto,Queen's Park,43.6623,-79.3895
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.6537,-79.5069
99,M4Y,Downtown Toronto,Church and Wellesley,43.6659,-79.3832
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.6627,-79.3216
101,M8Y,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",43.6363,-79.4985
