# Segmenting and Clustering Neighborhoods in Toronto

<h3>Introduction</h3>
<p>
This is an assignment for the Introduction to Artifical Intelligence course (SOFE 3720U). Within this we will be explore how to segement and cluster the neighborhoods in Toronto. 
</p>

<h3>Import Statements</h3>

In [149]:
from dotenv import load_dotenv
from dotenv import dotenv_values

import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json
import geojson

import requests
from pandas import json_normalize

import folium

from bs4 import BeautifulSoup as bs

from sklearn.cluster import KMeans
from sklearn import preprocessing

import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries Imported')

Libraries Imported


<h3>Week 1 - Foursquare API</h3>
<p>
Within this section we will be using the Foursquare API to find latitude, longitude, and venues within the Toronto area.
</p>

<h4>
Setting up Foursquare API
</h4>

In [150]:
#Import the hidden values within the .env file, these values are keys used to access API
config = dotenv_values(".env")
#Assign url variable to initialize API
url = "https://api.foursquare.com/v3/places/nearby"
#Assign header which will allow us to access the website by passing through keys 
headers = {"Accept": "application/json","Authorization": config["API_KEY"]}
#Create the request statement which allows to freely use API
response = requests.request("GET", url, headers=headers)

#Initialize and define findNearbyVenues function
def findNearbyVenues(location, categories, limit):
    #Assign url variable which is based on the parameters passed through function call
    url = "https://api.foursquare.com/v3/places/search?" + "categories=" + categories + "&near=" + str(location[0]) + "%2C" + str(location[1]) + "&limit=" + limit
    #Create the response statement from requesting from API
    response = requests.request("GET", url, headers=headers)
    #Return the result if the code was successful
    if(response.status_code == 200):
        return response.json()
    #Return false if the code didn't work
    else:
        return False
    
print('API initialize and custom function created')

API initialize and custom function created


<h4>
Creating dataframe using the function created to use Foursquare API
</h4>

In [151]:
#Assign lat and long for Toronto
latitude = 43.6532 
longitude = -79.3832
#Assign result variable use the custom function with the parameters below
results = findNearbyVenues(location = ["Toronto", "ON"], categories="17000", limit="50")
#Normalize the results to be able to changed
venuesdf = json_normalize(results['results'], max_level=3)
#Drop unnecessary columns
venuesdf.drop(venuesdf.columns[[0,1,2,3,5,8,9,10,11,12,13,17,18,19,20,22]], axis=1, inplace=True)
#Display first five rows
venuesdf.head()

Unnamed: 0,link,timezone,geocodes.main.latitude,location.formatted_address,location.locality,location.neighborhood,related_places.parent.name,location.po_box
0,/v3/places/4adcfd7cf964a5203e6321e3,America/Toronto,43.668877,"200 Christie St (at Essex St), Toronto ON M6G 3B6",Toronto,[Christie Pitts],,
1,/v3/places/5810fe9138fabe486b7d632c,America/Toronto,43.726102,"3401 Dufferin St, Toronto ON M6A 2T9",Toronto,,Yorkdale,
2,/v3/places/4ad4c062f964a52002f820e3,America/Toronto,43.725902,"3401 Dufferin St, Toronto ON M6A 2T9",Toronto,[Downsview],,
3,/v3/places/580a6d5fd67cd1299d4aa14f,America/Toronto,43.726446,"3401 Dufferin St (Yorkdale Rd), Toronto ON M6A...",Toronto,"[Lawrence Heights, Toronto, ON]",Yorkdale,
4,/v3/places/5a6f54bbf193c029b1ecd567,America/Toronto,43.661965,"60 Carlton St, Toronto ON M5B 1J2",Toronto,,,


In [152]:
#Create the map which is based on the coordinates for Toronto
mapWithMarkers = folium.Map(location=[latitude, longitude], zoom_start=10)
#Display Map
mapWithMarkers

<h3>Week 2 - Prepare your data</h3>
<p>
Within this section we will be using the provide source to create a large dataframe which contains the neccessary information for the choosen correlations
</p>

In [153]:
url = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050'
temp = requests.get(url)
data = temp.text
soup = bs(data,'html.parser')
wiki = soup.find('table')
df = pd.read_html(str(wiki))[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [154]:
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace=True)
df.index = range(len(df))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [155]:
dfPostalCodes = pd.read_csv('Geospatial_Coordinates.csv')
dfPostalCodes.rename(columns={'Postal Code':'Postcode'}, inplace=True)
dfMerge = pd.merge(df, dfPostalCodes, on='Postcode')
dfMerge.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


In [156]:
with open('Crime_Rates.geojson') as f:
    data = geojson.load(f)
dfCrime=pd.json_normalize(data["features"])
dfCrime.drop(dfCrime.columns[[0,1,2,3,5,7,8,9,10,11,12,14]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[3,4,5,6,7,8,9,11]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[4,5,6,7,8,9,10,12]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[5,6,8,9,10,11,13]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[5,7]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[6,7,8,9,10,11,13,14]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[7,8,9,10,11,12,14]], axis=1, inplace=True)
dfCrime.drop(dfCrime.columns[[8,9,10]], axis=1, inplace=True)
dfCrime.rename(columns={'properties.Neighbourhood':'Neighbourhood'}, inplace=True)
dfCrime.head()

Unnamed: 0,Neighbourhood,properties.Population,properties.Assault_AVG,properties.AutoTheft_AVG,properties.BreakandEnter_AVG,properties.Homicide_AVG,properties.Robbery_AVG,properties.TheftOver_AVG
0,Yonge-St.Clair,12528,31.0,4.3,23.3,0.0,5.7,4.3
1,York University Heights,27593,333.2,106.3,113.2,0.8,75.8,36.3
2,Lansing-Westgate,16164,70.7,23.7,38.8,1.7,14.7,7.0
3,Yorkdale-Glen Park,14804,160.2,55.5,63.3,1.2,31.5,22.5
4,Stonegate-Queensway,25051,83.2,28.7,52.8,0.0,20.7,6.0


In [157]:
dfAll = pd.merge(dfMerge, dfCrime, on='Neighbourhood')
dfAll.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,properties.Population,properties.Assault_AVG,properties.AutoTheft_AVG,properties.BreakandEnter_AVG,properties.Homicide_AVG,properties.Robbery_AVG,properties.TheftOver_AVG
0,M4A,North York,Victoria Village,43.725882,-79.315572,17510,119.3,16.5,39.0,0.7,13.2,5.0
1,M1B,Scarborough,Rouge,43.806686,-79.194353,46496,173.3,50.5,72.3,0.8,32.3,11.8
2,M1B,Scarborough,Malvern,43.806686,-79.194353,43794,278.2,47.2,52.3,1.7,50.7,9.0
3,M1C,Scarborough,Highland Creek,43.784535,-79.160497,12494,54.3,17.5,31.8,0.3,13.3,2.3
4,M3C,North York,Flemingdon Park,43.7259,-79.340923,21933,141.2,7.8,21.0,0.2,20.0,3.8


In [158]:
for name, latitude, longitude, pop, r1, r2, r3, r4, r5 ,r6 in zip(dfAll['Neighbourhood'], dfAll['Latitude'], dfAll['Longitude'], dfAll['properties.Population'], dfAll['properties.Assault_AVG'], dfAll['properties.AutoTheft_AVG'], dfAll['properties.BreakandEnter_AVG'], dfAll['properties.Homicide_AVG'], dfAll['properties.Robbery_AVG'], dfAll['properties.TheftOver_AVG']):
    label = '{}, Population = {}, Assault AVG = {}, Auto Theft AVG = {}, Break and Enter AVG = {}, Homicide AVG = {}, Robbery AVG = {}, Theft Over AVG = {}'.format(name, pop, r1, r2, r3, r4, r5, r6)
    label = folium.Popup(label, parse_html=True, max_width=300, min_width=300)
    folium.CircleMarker(
    [latitude,longitude],
    radius=5,
    popup=label,
    color='red',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(mapWithMarkers)

mapWithMarkers

In [159]:
k=8
toronto_clustering = dfAll
kmeans = KMeans(n_clusters = k).fit(pd.get_dummies(toronto_clustering[["Borough"]]))
dfAll.insert(0, 'Cluster Labels', kmeans.labels_)
dfAll.head()

Unnamed: 0,Cluster Labels,Postcode,Borough,Neighbourhood,Latitude,Longitude,properties.Population,properties.Assault_AVG,properties.AutoTheft_AVG,properties.BreakandEnter_AVG,properties.Homicide_AVG,properties.Robbery_AVG,properties.TheftOver_AVG
0,1,M4A,North York,Victoria Village,43.725882,-79.315572,17510,119.3,16.5,39.0,0.7,13.2,5.0
1,2,M1B,Scarborough,Rouge,43.806686,-79.194353,46496,173.3,50.5,72.3,0.8,32.3,11.8
2,2,M1B,Scarborough,Malvern,43.806686,-79.194353,43794,278.2,47.2,52.3,1.7,50.7,9.0
3,2,M1C,Scarborough,Highland Creek,43.784535,-79.160497,12494,54.3,17.5,31.8,0.3,13.3,2.3
4,1,M3C,North York,Flemingdon Park,43.7259,-79.340923,21933,141.2,7.8,21.0,0.2,20.0,3.8


In [160]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, neighbourhood, cluster in zip(dfAll['Latitude'], dfAll['Longitude'], dfAll['Neighbourhood'], dfAll['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters