# Segmenting and Clustering Neighborhoods in Toronto

In [90]:
# !conda install -c conda-forge geopy --yes
# !conda install -c conda-forge geocoder --yes

import pandas as pd
import numpy as np

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_toronto_postal = pd.read_html(url)[0]

# Preprocessing
# Clean Borough
df_toronto_postal_filter = df_toronto_postal[df_toronto_postal['Borough'] != 'Not assigned'].reset_index(drop=True)
# Merge Neighbourhood
df_toronto_postal_filter = df_toronto_postal_filter.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
# Clean Neighbourhood
mask = df_toronto_postal_filter['Neighbourhood'] == "Not assigned"
df_toronto_postal_filter.loc[mask, 'Neighbourhood'] = df_toronto_postal_filter.loc[mask, 'Borough']
print(f'The numbers of rows in the dataset are: {df_toronto_postal_filter.shape[0]}')

The numbers of rows in the dataset are: 103


# Geocoding

Given that this package can be very unreliable, in case you are not able to get the geographical coordinates of the neighborhoods using the Geocoder package, here is a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [17]:
# Read Data
geo = pd.read_csv('http://cocl.us/Geospatial_data')
geo

In [89]:
# Merge Data
# 1. Create temp index with Postal Code
toronto_df_temp = df_toronto_postal_filter.set_index('Postal Code')
# 2. Set geo index using geo data
geo_temp = geo.set_index('Postal Code')
# 3. Update table by inner join
df_toronto_postal_filter = pd.concat([toronto_df_temp, geo_temp], axis=1, join='inner')
# 4. Update the column name
df_toronto_postal_filter.index.name = 'Postal Code'
# 5. Reset index
df_toronto_postal_filter.reset_index(inplace=True)

print(df_toronto_postal_filter.shape)
df_toronto_postal_filter.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
