In [1]:
import pandas as pd
import numpy as np
import json
import requests
from pandas.io.json import json_normalize

### Load the data from previously saved .csv file

In [2]:
toronto_data = pd.read_csv('postal_list_coords.csv', index_col = 0)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Make API calls and store the results in json

In [3]:
CLIENT_ID = '1BOLDFOSJ4JNUULUEH4LDAMG4X40I0VWMQKIDCHNEJ4RJMLA'
CLIENT_SECRET = '4PRFRPNP451CQGFQTT2CIJGSOJE3JIFX5QXATUGPVK4QPW0U'
VERSION = '20200224'
LIMIT = 100

### Function to get the categories from results

In [4]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Define a function to make API calls, clean the json and structure it into dataframe for each postal code

In [5]:
def getNearbyVenues(postcodes, lats, lngs, radius = 800):
    
    all_venues = pd.DataFrame()
    
    # Make API calls for each postcode and get results
    for postcode, latitude, longitude in zip(postcodes, lats, lngs):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            latitude, 
            longitude, 
            VERSION, 
            radius, 
            LIMIT)
        results = requests.get(url).json()
        venues = results['response']['groups'][0]['items']
        
        # Clean the results from each API call and structure it to pandas DataFrames
        if venues != []:
            nearby_venues = json_normalize(venues)
            filtered_cols = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
            nearby_venues = nearby_venues.loc[:, filtered_cols]
            nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
            nearby_venues.columns = ['Venue Name', 'Venue Categories', 'Venue Latitude', 'Venue Longitude']
            nearby_venues['Postcode'] = [postcode for i in range(len(nearby_venues))]
        
            all_venues = pd.concat([all_venues, nearby_venues], axis = 0, sort = False)
        
        # If there are no venues found in the neoghbourhood
        else:
            print('No venues found for postcode ' + postcode)
        
        all_venues.reset_index(inplace = True, drop = True)
    
    return all_venues

In [6]:
postcodes = toronto_data.loc[:,'Postcode']
lats = toronto_data.loc[:,'Latitude']
lngs = toronto_data.loc[:,'Longitude']
all_venues = getNearbyVenues(postcodes, lats, lngs)

No venues found for postcode M1X


In [7]:
all_venues.head()

Unnamed: 0,Venue Name,Venue Categories,Venue Latitude,Venue Longitude,Postcode
0,Wendy's,Fast Food Restaurant,43.802008,-79.19808,M1B
1,Wendy's,Fast Food Restaurant,43.807448,-79.199056,M1B
2,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607,M1B
3,Tim Hortons,Coffee Shop,43.802,-79.198169,M1B
4,Bus Stop: 85 & 116,Bus Station,43.802198,-79.199389,M1B


In [8]:
all_venues.shape

(3983, 5)

### Join venues dataframe with toronto geospatial dataframe

In [9]:
toronto_venues = toronto_data.join(all_venues.set_index('Postcode'), on = 'Postcode').reset_index(drop = True)
toronto_venues.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Venue Name,Venue Categories,Venue Latitude,Venue Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Wendy's,Fast Food Restaurant,43.802008,-79.19808
1,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Wendy's,Fast Food Restaurant,43.807448,-79.199056
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607
3,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Tim Hortons,Coffee Shop,43.802,-79.198169
4,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Bus Stop: 85 & 116,Bus Station,43.802198,-79.199389


In [10]:
toronto_venues.shape

(3984, 9)

### Count the frequencies of different categories of venues in neighbourhoods

In [11]:
toronto_onehot = pd.get_dummies(toronto_venues['Venue Categories'])
toronto_onehot[['Postcode', 'Borough','Neighbourhood']] = toronto_venues[['Postcode', 'Borough','Neighbourhood']]

# Move the last 3 columns to the first 3 columns
rearranged_cols = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[rearranged_cols]
toronto_onehot.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
toronto_onehot.shape

(3984, 339)

In [13]:
toronto_grouped = toronto_onehot.groupby('Postcode').mean()
toronto_grouped = toronto_data.join(toronto_grouped, on = 'Postcode').reset_index(drop = True)
toronto_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619


In [14]:
toronto_grouped.shape

(103, 341)

### Run K-meas clustering on the toronto dataset

In [15]:
from sklearn.cluster import KMeans

# Set number of clusters
kcluster = 5

feature_cols = toronto_grouped.columns[5:]
toronto_grouped_clustering = toronto_grouped[feature_cols]

# Run KMeans clustering
kmeans = KMeans(n_clusters = kcluster, random_state = 0).fit(toronto_grouped_clustering)

### Add the clustering results to the dataset

In [16]:
toronto_grouped['Labels'] = kmeans.labels_
toronto_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Labels
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,2


### Visualize the clustering results

In [20]:
from matplotlib import cm
import matplotlib.colors as cls
import folium

lat = toronto_data.loc[0]['Latitude']
lng = toronto_data.loc[0]['Longitude']
map_toronto = folium.Map(location = [lat, lng], zoom_start = 10)

colors = cm.viridis(np.linspace(0,1,kcluster))
postcodes = toronto_grouped.loc[:,'Postcode']
boroughs = toronto_grouped.loc[:,'Borough']
neighbourhoods = toronto_grouped.loc[:,'Neighbourhood']
lats = toronto_grouped.loc[:,'Latitude']
lngs = toronto_grouped.loc[:,'Longitude']
clusters = toronto_grouped.loc[:,'Labels']


for postcode, borough, neighbourhood, lat, lng, cluster in zip(postcodes, boroughs, neighbourhoods,lats, lngs, clusters):
    
    label = folium.Popup(str(postcode) + '\n' + str(borough) + '\n' + str(neighbourhood) + '\n' + ' Cluster ' + str(cluster), parse_html=True)
        
    folium.CircleMarker(
        location = [lat, lng],
        radius = 5,
        popup = label,
        color = cls.rgb2hex(colors[cluster]),
        fill = True,
        fill_color = cls.rgb2hex(colors[cluster]),
        fill_opacity=0.7
    ).add_to(map_toronto)
    
map_toronto
