# Import Package

In [37]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim

# Request Data From Website, and Append Data To Different List

In [38]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
body = soup.find('tbody')
tags = body('td')
postal_code =list()
borough = list()
neigh = list()
a = 2
for tag in tags:
    a = a+1
    tag=str(tag)
    x = re.findall('^<td>(.+)', tag)
    if a%3 == 0:
        postal_code.append(x)
    elif a%3 == 1:
        borough.append(x)
    elif a%3 == 2:
        neigh.append(x)

# Convert List of List to The List Of String

In [39]:
postal_code = [''.join(x) for x in postal_code]
borough = [''.join(x) for x in borough]
neigh = [''.join(x) for x in neigh]

# Build A DataFrame

In [40]:
df = {'Postal Code': postal_code,
     'Borough': borough,
     'Neighborhood': neigh}
df = pd.DataFrame(df)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


# Clean Empty Data

In [41]:
empty = df[df['Borough'] == 'Not assigned'].index
df.drop(empty, inplace = True)

In [42]:
df.reset_index(inplace = True, drop =True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [43]:
df.shape

(103, 3)

In [44]:
coord = pd.read_csv('C:\\Users\\vv123\\Desktop\\projects\\Coursera_Capstone\\Geospatial_Coordinates.csv')

In [45]:
coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [46]:
df = pd.merge(df, coord, on = 'Postal Code')

In [47]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [48]:
downtownt_df = df[df['Borough']=='Downtown Toronto'].reset_index(drop = True)

In [49]:
centralt_df = df[df['Borough']=='Central Toronto'].reset_index(drop = True)

In [50]:
eastt_df = df[df['Borough']=='East Toronto'].reset_index(drop = True)

In [51]:
westt_df = df[df['Borough']=='West Toronto'].reset_index(drop = True)

In [52]:
Toronto_dataframe = westt_df.append(eastt_df).append(centralt_df).append(downtownt_df).reset_index(drop = True)

In [53]:
Toronto_dataframe.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


In [54]:
address = 'Toronto'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [55]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_dataframe['Latitude'], Toronto_dataframe['Longitude'], Toronto_dataframe['Borough'], Toronto_dataframe['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto