# Segmenting and Clustering Neighbourhood of Toronto - Part 1

#### Libraries Installation 

In [22]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import numpy as np

import folium # plotting library
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported')

Libraries imported


In [2]:
df = pd.read_csv('TorrontoGeoData.csv')
df

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,0,M3A,North York,Parkwoods,43.753259,-79.329656
1,1,M4A,North York,Victoria Village,43.725882,-79.315572
2,2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,4,M7A,Queen's Park,North York,43.662301,-79.389494
...,...,...,...,...,...,...
98,98,M8X,Etobicoke,"The Kingsway, Old Mill North, Montgomery Road",43.653654,-79.506944
99,99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
101,101,M8Y,Etobicoke,"Mimico NE, Kingsway Park South East, Royal Yor...",43.636258,-79.498509


In [3]:
df.drop('Unnamed: 0',axis=1,inplace=True)
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,North York,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Old Mill North, Montgomery Road",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
101,M8Y,Etobicoke,"Mimico NE, Kingsway Park South East, Royal Yor...",43.636258,-79.498509


In [7]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,North York,43.662301,-79.389494


#### Will work with only Toronto 

In [15]:
data = df[df['Borough'].str.contains('Toronto')]
df_data = data.reset_index(drop=True)
df_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, King, Adelaide",43.650571,-79.384568
8,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [16]:
df_data.shape

(38, 5)

#### Unique values for Toronto value 

In [17]:
df_data['Borough'].value_counts()

Downtown Toronto    18
Central Toronto      9
West Toronto         6
East Toronto         5
Name: Borough, dtype: int64

#### Labeling the Data according to the Location

In [18]:
df_data['label']=df_data['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','East Toronto','West Toronto'],value=[1,2,3,4],inplace=False)
df_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,label
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,1
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,1
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,3
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1


#### Initiating Map  

In [19]:
address = 'Toronto'
geolocator = Nominatim(user_agent='toronto_explorer')
location=geolocator.geocode(address)
longitude=location.longitude
latitude=location.latitude
print(f'The geograpical coordinate of Toronto are {latitude}, {longitude}.')

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Map of the Toronto with all the columns 

In [38]:
map_toronto = folium.Map(location=[longitude,latitude],zoom_start=11)

for lat, long, postalcode,borough,neighborhood in zip(df_data['Latitude'], df_data['Longitude'], df_data['PostalCode'],df_data['Borough'],df_data['Neighborhood']):
    label='Postal Code:{},Neighborhood:{},Borough:{}'.format(postalcode,neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,long],radius=5,color='blue',popup=label,
    fill=True,fill_color='#3186cc',fill_opacity=0.7,
    parse_html=True).add_to(map_toronto)

In [39]:
map_toronto