# 1, Import libraries

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from bs4 import BeautifulSoup
import requests
import geocoder
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# 2, Import data from previous notebook

In [56]:
df = pd.read_csv("file2.csv", index_col = 0)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# 3, Explore Toronto

In [57]:
tor_df = df[df.Borough.str.contains("Toronto")].reset_index().drop(columns="index")
tor_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [58]:
tor_map = folium.Map(location=[tor_df.Latitude.mean(), tor_df.Longitude.mean()], zoom_start=11)

# add markers to map
for lat, lng, label in zip(tor_df['Latitude'], tor_df['Longitude'], tor_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tor_map)  
    
tor_map

In [59]:
CLIENT_ID = '1LH5UHA0XLZSSFNYESGAVK5TTPIJNG1VACOREOXGAXEMRGIE' # your Foursquare ID
CLIENT_SECRET = 'Z1PHKBXIOGYDCISVKKVJWGSTBIK2KL0YFVJLWWRRBGQPSZPO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [60]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [61]:
tor_venues = getNearbyVenues(names=tor_df['Neighbourhood'],
                                   latitudes=tor_df['Latitude'],
                                   longitudes=tor_df['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

In [62]:
tor_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [63]:
tor_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",24,24,24,24,24,24
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16
"Cabbagetown, St. James Town",42,42,42,42,42,42
Central Bay Street,84,84,84,84,84,84
"Chinatown, Grange Park, Kensington Market",80,80,80,80,80,80
Christie,18,18,18,18,18,18
Church and Wellesley,81,81,81,81,81,81


# 4, Analyze the neibourhood

In [64]:
tor_onehot = pd.get_dummies(tor_venues[["Venue Category"]], prefix = "", prefix_sep = "")
tor_onehot["Neighborhood"] = tor_venues["Neighborhood"]
tor_cols = list(tor_onehot.columns)
tor_cols.remove("Neighborhood")
tor_cols = ["Neighborhood"] + tor_cols
tor_onehot = tor_onehot[tor_cols]



In [65]:
tor_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [66]:
tor_group = tor_onehot.groupby("Neighborhood").mean().reset_index()
tor_group

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,...,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.011905
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0125,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,...,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346


In [67]:
num_top_venues = 10
list_cols = ["Neighborhood", "1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
tor_top10 = pd.DataFrame(columns = list_cols)
for i in tor_group.index:
    temp_nei = tor_group.loc[i, "Neighborhood"]
    temp_df = pd.DataFrame(tor_group.loc[i, :][1:]).reset_index()
    temp_df.columns = ["Venue", "Freq"]
    temp_df["Freq"] = round(temp_df.Freq.astype(float), 2)
    temp_df = temp_df.sort_values("Freq", ascending = False).head(num_top_venues).reset_index(drop = True)
    tor_top10.loc[i, :] = np.append([temp_nei], list(temp_df.Venue.values))
    print("---{}---".format(temp_nei))
    print(temp_df)
    

---Adelaide, King, Richmond---
              Venue  Freq
0       Coffee Shop  0.07
1              Café  0.04
2        Steakhouse  0.04
3               Bar  0.04
4    Cosmetics Shop  0.03
5   Thai Restaurant  0.03
6        Restaurant  0.03
7      Burger Joint  0.03
8    Breakfast Spot  0.03
9  Asian Restaurant  0.03
---Berczy Park---
                Venue  Freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3         Cheese Shop  0.04
4          Steakhouse  0.04
5      Farmers Market  0.04
6                Café  0.04
7              Bakery  0.04
8            Beer Bar  0.04
9      Breakfast Spot  0.02
---Brockton, Exhibition Place, Parkdale Village---
                    Venue  Freq
0          Breakfast Spot  0.08
1                    Café  0.08
2             Coffee Shop  0.08
3               Nightclub  0.08
4                     Gym  0.04
5           Burrito Place  0.04
6            Climbing Gym  0.04
7       Convenience Store  0.04
8  Furniture / Hom

---The Beaches---
                      Venue  Freq
0         Health Food Store  0.25
1                     Trail  0.25
2                       Pub  0.25
3         Afghan Restaurant  0.00
4             Movie Theater  0.00
5                    Lounge  0.00
6                    Market  0.00
7  Mediterranean Restaurant  0.00
8               Men's Store  0.00
9             Metro Station  0.00
---The Beaches West, India Bazaar---
                Venue  Freq
0                Park  0.11
1             Brewery  0.06
2          Steakhouse  0.06
3    Sushi Restaurant  0.06
4      Sandwich Place  0.06
5  Italian Restaurant  0.06
6       Movie Theater  0.06
7      Ice Cream Shop  0.06
8       Burrito Place  0.06
9                 Pub  0.06
---The Danforth West, Riverdale---
                     Venue  Freq
0         Greek Restaurant  0.24
1              Coffee Shop  0.10
2       Italian Restaurant  0.07
3               Restaurant  0.05
4           Ice Cream Shop  0.05
5   Furniture / Home Store  0.

In [68]:
tor_top10

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Bar,Cosmetics Shop,Thai Restaurant,Restaurant,Burger Joint,Breakfast Spot,Asian Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Cheese Shop,Steakhouse,Farmers Market,Café,Bakery,Beer Bar,Breakfast Spot
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Nightclub,Gym,Burrito Place,Climbing Gym,Convenience Store,Furniture / Home Store,Grocery Store
3,Business Reply Mail Processing Centre 969 Eastern,Smoke Shop,Garden Center,Pizza Place,Comic Shop,Recording Studio,Restaurant,Butcher,Burrito Place,Farmers Market,Fast Food Restaurant
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Airport Food Court,Airport Gate,Harbor / Marina,Bar,Airport,Rental Car Location,Boat or Ferry
5,"Cabbagetown, St. James Town",Restaurant,Coffee Shop,Pizza Place,Bakery,Café,Pub,Italian Restaurant,Caribbean Restaurant,Market,Snack Place
6,Central Bay Street,Coffee Shop,Ice Cream Shop,Café,Italian Restaurant,Sandwich Place,Burger Joint,Japanese Restaurant,Juice Bar,Bubble Tea Shop,Spa
7,"Chinatown, Grange Park, Kensington Market",Bar,Café,Chinese Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Dumpling Restaurant,Mexican Restaurant,Grocery Store,Farmers Market
8,Christie,Grocery Store,Café,Park,Nightclub,Candy Store,Italian Restaurant,Diner,Restaurant,Gas Station,Athletics & Sports
9,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Hotel,Pub,Café,Mediterranean Restaurant,Men's Store


# 5, Cluster the neighborhoods

In [69]:
k = 5
tor_group_clus = tor_group.drop(columns = ["Neighborhood"])
kmeans = KMeans(n_clusters = k, random_state = 0).fit(tor_group_clus)
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       2, 0, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 4, 0, 0])

In [70]:
# add clustering labels
tor_top10.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = tor_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tor_merged = pd.merge(tor_merged, tor_top10.set_index('Neighborhood'), left_on='Neighbourhood', right_on = "Neighborhood")

tor_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Health Food Store,Trail,Pub,Afghan Restaurant,Movie Theater,Lounge,Market,Mediterranean Restaurant,Men's Store,Metro Station
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Ice Cream Shop,Furniture / Home Store,Fruit & Vegetable Store,Bubble Tea Shop,Caribbean Restaurant,Cosmetics Shop
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Park,Brewery,Steakhouse,Sushi Restaurant,Sandwich Place,Italian Restaurant,Movie Theater,Ice Cream Shop,Burrito Place,Pub
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Brewery,American Restaurant,Bakery,Italian Restaurant,Gastropub,Comfort Food Restaurant,Convenience Store,Clothing Store
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Park,Lake,Bus Line,Swim School,Afghan Restaurant,Monument / Landmark,Market,Mediterranean Restaurant,Men's Store,Metro Station


In [71]:
# create map
map_clusters = folium.Map(location=[tor_df.Latitude.mean(), tor_df.Longitude.mean()], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighbourhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters