# Jupyter Notebook for analysing Data about Toronto Neighbourhoods

## Author: Utkrist Singh

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
file_path='Toronto_wiki.csv'
df=pd.read_csv(file_path)
df=df[df['Borough']!='Not assigned']
df.shape


(103, 3)

##### The first part of the notebook is over

##### Now using geopy to get co-ordinates from postal address 

In [3]:
df.head(40)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


import time
locator=Nominatim(user_agent='my_ge')
l1=[]
l2=[]
df=df[locator.geocode(df['Postal Code'])!='None']
for va in df['Postal Code']:
    print(locator.geocode(va),end='//')
    time.sleep(2)
    print(va,end=' ')
##### Geopy wasn't working so I used a different dataset for co-ordinates



In [7]:
#import time
#locator=Nominatim(user_agent='my_geo')
#lat=[]
#lon=[]
#for value in df['Postal Code']:
    #location=locator.geocode(value)
   # lat.append(location.latitude)
  #  lon.append(location.longitude)
 #   time.sleep(2)
#print(lat)

In [16]:
file2='Geospatial_Coordinates.csv'
df2=pd.read_csv(file2)
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
arr=np.array(df['Postal Code'])
arr=list(arr)
lat=[]
long=[]
for values in df['Postal Code']:
    ind=arr.index(values)
    lat.append(df2['Latitude'][ind])
    long.append(df2['Longitude'][ind])
df['Latitude']=lat
df['Longitude']=long
df.head(50)
        


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.806686,-79.194353
3,M4A,North York,Victoria Village,43.784535,-79.160497
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.770992,-79.216917
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.744734,-79.239476
9,M1B,Scarborough,"Malvern, Rouge",43.727929,-79.262029
11,M3B,North York,Don Mills,43.711112,-79.284577
12,M4B,East York,"Parkview Hill, Woodbine Gardens",43.716316,-79.239476
13,M5B,Downtown Toronto,"Garden District, Ryerson",43.692657,-79.264848


##### The second part of the notebook is over

##### Now segmenting and clustering based on boroughs

In [24]:
#Visualising neighbourhoods
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)
for lat,long,neigh,bour in zip(df['Latitude'],df['Longitude'],df['Neighbourhood'],df['Borough']):
    label= '{}, {}'.format(neigh, bour)
    label=folium.Popup(label,parse_html=True)
    loc_data=folium.CircleMarker([lat,long],color='blue',radius=5,popup=label,fill=True,fill_color='#3186cc')
    loc_data.add_to(map_toronto)
map_toronto

In [29]:
from sklearn.cluster import KMeans
k=5#Pre-defining number of clusters
df3=df.drop(['Neighbourhood','Borough','Postal Code'],axis=1)
clus=KMeans(n_clusters=k,random_state=0).fit(df3)
print(clus.labels_)
df['Clusters']=clus.labels_
df.head()

[2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 4 1 4 4 4 4 4 4 1 1 1 4 4 4 0 0 0 1 1 1
 1 1 1 1 1 1 1 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 3 3 3 3 3 3 3 4 4 3
 3 3 3 3 3 0 0 0 3 3 0 3 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Clusters
2,M3A,North York,Parkwoods,43.806686,-79.194353,2
3,M4A,North York,Victoria Village,43.784535,-79.160497,2
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711,2
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.770992,-79.216917,2
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476,2


In [36]:
#Creating map using Folium
toronto_map=folium.Map(location=[43.651070,-79.347015],zoom_start=10)
#Creating colors for clusters
rainbow=['blue','green','yellow','orange','red']

for lat,lon,neigh,clust in zip(df['Latitude'],df['Longitude'],df['Neighbourhood'],df['Clusters']):
    labels=folium.Popup("Cluster label= "+str(clust))
    loc_data1=folium.CircleMarker([lat,lon],popup=labels,radius=5,color=rainbow[clust-1])
    loc_data1.add_to(toronto_map)
    
    
toronto_map