# Week 03 Assignment

#### Author: Viet Nguyen

In [38]:
# import libraries
import pandas as pd
import numpy as np

# convert an address into latitude and longitude values
#!pip install geopy
from geopy.geocoders import Nominatim 

# map rendering library
#!pip install folium
import folium

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# library to handle requests
import requests

# read the Wikipedia's URL to Dataframe using read_html() function
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", attrs={"class":"wikitable"}, header = 0)[0]

In [39]:
# Replace Neighbourhood with 'Not assigned' value where Borough's value is not empty by the value of its associated Borough
for i in range(0, df['Postcode'].count()):
    if (df.at[i , 'Borough'] != 'Not assigned'):
        if (df.at[i , 'Neighbourhood'] == 'Not assigned'):
            df.iloc[i]['Neighbourhood'] = df.iloc[i]['Neighbourhood'].replace('Not assigned', df.at[i , 'Borough'])
    
# Replace 'Not assigned' values of Borough column with 'NaN' value
df['Borough'].replace('Not assigned', np.nan, inplace = True)

In [40]:
# Drop rows with Borough's value is empty
df.dropna(axis = 0, how = 'any', inplace = True)

In [41]:
# Combine Neighbourhood's values with the same Postcode and Borough
df = df.groupby(['Postcode', 'Borough'], as_index = False, sort = False).agg(lambda x: ', '.join(x))

In [42]:
# Check the processed dataframe
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [43]:
# Verify
df.shape

(103, 3)

In [44]:
# Download location data csv file
!wget -q -O 'Geospatial_data.csv' https://cocl.us/Geospatial_data

In [45]:
# Generate dataframe of recently downloaded file
locations = pd.read_csv('Geospatial_data.csv')

# Check if it was processed correctly
locations.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [46]:
# Merge the 2 dataframes by Postcode as key
df = df.merge(locations, how='inner', right_on = 'Postal Code', left_on = 'Postcode')

In [47]:
# Drop the 'Postal Code' column as it becomes a duplicated column in the dataframe
df.drop(labels = 'Postal Code', axis = 1, inplace = True)

In [48]:
# Check it the dataframe was processed correctly
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [49]:
df_toronto = df
# Drop all rows that are not related to Toronto
for i in range(0, df['Postcode'].count()):
    if ("Toronto" not in df.at[i, 'Borough']):
        #print(df.at[i, 'Postcode'], ' - ', df.at[i, 'Borough'])
        df_toronto = df_toronto.drop(i)

# Reset index
df_toronto.reset_index(drop = True, inplace = True)

In [50]:
# Check it the dataframe was processed correctly
# It should display a dataframe with all rows related to Toronto
df_toronto

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
8,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [51]:
address = 'Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Manhattan are 43.653963, -79.387207.


In [52]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto