### Importing required libraries

In [38]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Visualization
import matplotlib.pyplot
import seaborn as sns

import json # library to handle JSON files

import base64 # library to hide Client Secret

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### To see full dataframe

In [20]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

### Scraping the page to extract the data table

In [34]:
# Link To Extract
path = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# Read File
df_wiki = pd.read_html(path)
# Check the type
type(df_wiki)
# Call the position where the table is stored
neighborhood = df_wiki[0]
# Making DataFrame
neighborhood = pd.DataFrame(neighborhood)
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
neighborhood.rename(columns={'Postal Code':'PostalCode', 1: 'Borough', 'Neighbourhood': 'Neighborhood'}, inplace=True)
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
neighborhood = neighborhood[neighborhood.Borough != 'Not assigned']
# Merging rows with same PostalCode
neighborhood.set_index(['PostalCode','Borough'], inplace=True)
combined_neighborhood = neighborhood.groupby(level=['PostalCode','Borough'], sort=False).agg( ','.join)
# Setting the index
combined_neighborhood = combined_neighborhood.reset_index()
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough. 
combined_neighborhood.Neighborhood = combined_neighborhood.Neighborhood.mask(combined_neighborhood.Neighborhood == 'Not assigned', combined_neighborhood.Borough, axis=0)
# Saving the file for future use!
combined_neighborhood.to_csv('toronto_postal_codes.csv')
# Showing the Data Frame
df = pd.DataFrame(combined_neighborhood)
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Getting the `shape` of the dataframe

In [35]:
df.shape

(103, 3)

### Read CSV with geographical coordinates data

In [36]:
csv_url = 'http://cocl.us/Geospatial_data'
df_geo = pd.read_csv(csv_url)
df_geo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge geographical coordinates to dataframe

In [37]:
# Place the DataFrames side by side
df = pd.read_csv('toronto_postal_codes.csv')
df = df.merge(df_geo, on='PostalCode', how='inner')
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
