In [1]:
import pandas as pd
import numpy as np
import folium
import geocoder
from geopy.geocoders import Nominatim
import requests
import matplotlib.pyplot as plt
import matplotlib.colors as colors

# Part 1

Read the table in the wikipedia page into a pandas dataframe

In [2]:
neighbour_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

Filter out the Borough's that are _"Not assigned"_ and print some details about the data

In [3]:
neighbour_df = neighbour_df[neighbour_df['Borough'] != 'Not assigned']

print(f'number of unique Boroughs : {neighbour_df["Borough"].nunique()}')
print(f'number of unique Postal codes : {neighbour_df["Postal Code"].nunique()}')

number of unique Boroughs : 10
number of unique Postal codes : 103


Find out if there are any Neighborhoods that are _"Not assigned"_

In [4]:
neighbour_df[neighbour_df['Neighborhood'] == 'Not assigned'].count()

Postal Code     0
Borough         0
Neighborhood    0
dtype: int64

Consolidate the rows with identical Postal code and Borough such that their Neighborhoods are in one row separated by ','

In [5]:
neighbour_df = neighbour_df.groupby(['Postal Code', 'Borough'])['Neighborhood'].apply(','.join).reset_index()

In [6]:
print(f'number of rows : {neighbour_df.shape[0]}')

number of rows : 103


# Part 2

In [7]:
neighbour_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


##### The geocoder library takes for ever to return and the Nominatim library in the geopy library returns None for M1 Scarborough. Hence I will be using the lat-long information in the provided csv file

In [28]:
address = neighbour_df.loc[2, 'Borough'] + ', ' + neighbour_df.loc[2, 'Postal Code'] + ', '+ 'Toronto'
address

'Scarborough, M1E, Toronto'

The code below returns <b>AttributeError: 'NoneType' object has no attribute 'latitude'</b>

In [29]:
# geolocator = Nominatim(user_agent="toronto_explorer")
# location = geolocator.geocode(address)
# lat = location.latitude
# lon = location.longitude

Read the lat-long details csv file

In [30]:
location_df = pd.read_csv('Geospatial_Coordinates.csv')
location_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Fetch the lat, long data from the csv and append it to the neighborhood data frame

In [42]:
for i, row in neighbour_df.iterrows():
    neighbour_df.loc[[i], 'Latitude'] = location_df[location_df['Postal Code'] == row['Postal Code']]['Latitude']
    neighbour_df.loc[[i], 'Longitude'] = location_df[location_df['Postal Code'] == row['Postal Code']]['Longitude']

In [44]:
neighbour_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part 3