# PART 1
Making a DataFrame.

Import the libraries.

In [1]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as soup
import requests

Request wikipedia's web content and convert it that it can be used with BeautifulSoup.

In [18]:
link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(link)
page = soup(page.content, 'html.parser')

Find the table on website and locate the rows (tr).
The first row contains column names.
The rest contains Postal Code, Borough and Neighbourhood.
Append the remaining infos to a list. Ignore rows with not assigned borough and if neighbourhood isn't known, share the name with borough.

In [89]:
infos = []

table = page.find("table", {"class":"wikitable"}).find_all("tr")

columns = []
columns = table[0].findAll("th")
columns = [i.text.replace("\n", "") for i in columns]


for tr in table[1:]:
    th = tr.findAll("td")
    row1 = th[0].text.replace("\n", "")
    row2 = th[1].text.replace("\n", "")
    if row2 == "Not assigned":
        continue
    row3 = th[2].text.replace("\n", "")
    if row3 == "Not assigned":
        row3 = row2
    infos.append([row1, row2, row3])

Finally make the DataFrame.

In [93]:
df = pd.DataFrame(infos, columns=columns)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [94]:
df.shape

(103, 3)

# PART 2
Extend the dataframe by longitudes and latitudes.

Unfortunately the code takes too long and throws error. So it's better to use the ready csv file.

In [104]:
import geocoder

latitudes, longitudes = [], []

for i, row in df.iterrows():
    postal_code = row[0]
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitudes.append(lat_lng_coords[0])
    longitudes.append(lat_lng_coords[1])

Save the csv file to Dataframe.

In [106]:
lon_lan = pd.read_csv("csv/Geospatial_Coordinates.csv")
lon_lan

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


Merge these two DataFrames into one and name it `canada_df`

In [130]:
canada_df = pd.merge(df, lon_lan, on="Postal Code")
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# PART 3
Analyze and visualize.

Make a map of Canada.

In [135]:
import folium

map_canada = folium.Map(location=[43.715383,	-79.405678], zoom_start=11)
map_canada

In case you cannot see the map:

<img src="https://raw.githubusercontent.com/weronikazak/ML-Projects/master/Coursera/graphs/map1.PNG">

Add pins with neighbourhoods.

In [136]:
for i, row in canada_df.iterrows():
    lat = row[-2]
    lng = row[-1]
    label = row[2]
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada) 
map_canada

In case you cannot see the map:

<img src="https://raw.githubusercontent.com/weronikazak/ML-Projects/master/Coursera/graphs/map2.PNG">