In [44]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim
import geocoder
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

In [7]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)

soup = BeautifulSoup(r.content, 'html5lib')
table = soup.find('div', attrs ={'id':'container'})

In [8]:
postalCodes = [];
boroughs= [];
neighborhoods = [];
columnNum = 1;
passVal = False

for row in soup.find_all('td'):
    for cell in row:
        if cell.string and cell.string[0].isalpha() and len(cell.string) > 2:
            passVal = False
            if columnNum == 1:
                if passVal == False and cell.string[1].isdigit():
                    postalCodes.append(cell.string.strip('\n'));   
                    columnNum = 2
                else:
                    continue
            elif columnNum == 2 :
                if cell.string == 'Not assigned':
                    passVal = True
                    del postalCodes[-1]
                    columnNum = 1
                    continue
                else:
                    boroughs.append(cell.string.strip('\n'));      
                    columnNum = 3
            elif columnNum == 3 :
                if cell.string == 'Not assigned\n':
                    neighborhoods.append(boroughs[-1])
                else:
                    neighborhoods.append(cell.string.strip('\n')); 
                columnNum = 1

In [9]:
# define the dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighbors = pd.DataFrame(columns=column_names)

neighbors

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


In [10]:
for data in range(0, len(postalCodes)-1):
    code = postalCodes[data]
    borough = boroughs[data]
    neighborhood_name = neighborhoods[data]
    
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    lat_lng_coords = g.latlng

    neighbors = neighbors.append({ 'PostalCode': code,
                                   'Borough': borough,
                                   'Neighborhood': neighborhood_name,
                                   'Latitude': lat_lng_coords[0],
                                   'Longitude': lat_lng_coords[1]}, ignore_index=True)

In [11]:
neighbors

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752420,-79.329242
1,M4A,North York,Victoria Village,43.730600,-79.313265
2,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166
3,M6A,North York,Lawrence Heights,43.723270,-79.451286
4,M6A,North York,Lawrence Manor,43.723270,-79.451286
...,...,...,...,...,...
204,M8Y,Etobicoke,Sunnylea,43.632835,-79.489550
205,M8Z,Etobicoke,Kingsway Park South West,43.624630,-79.526950
206,M8Z,Etobicoke,Mimico NW,43.624630,-79.526950
207,M8Z,Etobicoke,The Queensway West,43.624630,-79.526950


In [12]:
neighbors.shape

(209, 5)

In [13]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbors['Borough'].unique()),
        neighbors.shape[0]
    )
)

The dataframe has 11 boroughs and 209 neighborhoods.


#### Use geopy library to get the latitude and longitude values of Toronto City.

In [14]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [15]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbors['Latitude'], neighbors['Longitude'], neighbors['Borough'], neighbors['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [16]:
CLIENT_ID = 'I2KWHLNV0X0CQVOKFNBR5Y3AHM4TKTQGK04RQTKQHAN11MGF' # your Foursquare ID
CLIENT_SECRET = 'SRMOED32XWQULNVWLBT1QU5EQ3XPMBSB2UUYD5RJZBOAYCOB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: I2KWHLNV0X0CQVOKFNBR5Y3AHM4TKTQGK04RQTKQHAN11MGF
CLIENT_SECRET:SRMOED32XWQULNVWLBT1QU5EQ3XPMBSB2UUYD5RJZBOAYCOB


In [17]:
neighborhood_name = neighbors.loc[0, 'Neighborhood'] # neighborhood name
neighborhood_latitude = neighbors.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighbors.loc[0, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75242000000003, -79.32924245299995.


In [18]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=I2KWHLNV0X0CQVOKFNBR5Y3AHM4TKTQGK04RQTKQHAN11MGF&client_secret=SRMOED32XWQULNVWLBT1QU5EQ3XPMBSB2UUYD5RJZBOAYCOB&v=20180605&ll=43.75242000000003,-79.32924245299995&radius=500&limit=100'

Send the GET request and examine the resutls

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e5279d0edbcad001bda9e8a'},
 'response': {'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.75692000450003,
    'lng': -79.32302427998279},
   'sw': {'lat': 43.74791999550003, 'lng': -79.33546062601711}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 238,
        'cc': '

In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [23]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114
2,KFC,Fast Food Restaurant,43.754387,-79.333021
3,TTC stop - 44 Valley Woods,Bus Stop,43.755402,-79.333741


In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


## Explore Neighborhoods in Toronto

#### Let's create a function to repeat the same process to all the neighborhoods in Toronto

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_venues = getNearbyVenues(names=neighborhoods,
                                 latitudes=neighbors['Latitude'],
                                 longitudes=neighbors['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront
Lawrence Heights
Lawrence Manor
Queen's Park
Queen's Park
Rouge
Malvern
Don Mills North
Woodbine Gardens
Parkview Hill
Ryerson
Garden District
Glencairn
Cloverdale
Islington
Martin Grove
Princess Gardens
West Deane Park
Highland Creek
Rouge Hill
Port Union
Flemingdon Park
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens
Eringate
Markland Wood
Old Burnhamthorpe
Guildwood
Morningside
West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor
Downsview North
Wilson Heights
Thorncliffe Park
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Scarborough Village
Fairview
Henry Farm
Oriole
Northwood Park
York University
East Toronto
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
East Birchmount Park
Ionview
Kennedy Park
Bayview Village
CFB Toronto
Downsview East
The Danforth West
Riverdale
Design Exchange
Toronto 

#### Let's check the size of the resulting dataframe

In [27]:
print(toronto_venues.shape)
toronto_venues.head()

(4766, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75242,-79.329242,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75242,-79.329242,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.75242,-79.329242,KFC,43.754387,-79.333021,Fast Food Restaurant
3,Parkwoods,43.75242,-79.329242,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
4,Victoria Village,43.7306,-79.313265,Wigmore Park,43.731023,-79.310771,Park


Let's check how many venues were returned for each neighborhood

In [28]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Agincourt,14,14,14,14,14,14
Agincourt North,2,2,2,2,2,2
Albion Gardens,15,15,15,15,15,15
Alderwood,6,6,6,6,6,6
...,...,...,...,...,...,...
Woodbine Gardens,15,15,15,15,15,15
Woodbine Heights,17,17,17,17,17,17
York Mills West,4,4,4,4,4,4
York University,11,11,11,11,11,11


#### Let's find out how many unique categories can be curated from all the returned venues

In [35]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 267 uniques categories.


##  Analyze Each Neighborhood

In [36]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
toronto_onehot.shape

(4766, 267)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [38]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Adelaide,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.000000,0.03,...,0.0,0.0,0.01,0.0,0.0,0.000000,0.000000,0.01,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.071429,0.00,0.0,0.0
2,Agincourt North,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0
3,Albion Gardens,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.066667,0.000000,0.00,0.0,0.0
4,Alderwood,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,Woodbine Gardens,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0
195,Woodbine Heights,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.058824,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0
196,York Mills West,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0
197,York University,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.000000,0.00,...,0.0,0.0,0.00,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0


#### Let's confirm the new size

In [39]:
toronto_grouped.shape

(199, 267)

#### Let's print each neighborhood along with the top 5 most common venues

In [40]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
          venue  freq
0   Coffee Shop  0.07
1          Café  0.06
2    Steakhouse  0.04
3         Hotel  0.04
4  Burger Joint  0.03


----Agincourt----
                   venue  freq
0          Shopping Mall  0.14
1   Hong Kong Restaurant  0.07
2                 Bakery  0.07
3  Vietnamese Restaurant  0.07
4     Chinese Restaurant  0.07


----Agincourt North----
                             venue  freq
0                         Pharmacy   0.5
1                 Sushi Restaurant   0.5
2                      Yoga Studio   0.0
3  Molecular Gastronomy Restaurant   0.0
4                      Music Venue   0.0


----Albion Gardens----
                  venue  freq
0         Grocery Store  0.13
1   Japanese Restaurant  0.07
2   Fried Chicken Joint  0.07
3  Caribbean Restaurant  0.07
4        Sandwich Place  0.07


----Alderwood----
                venue  freq
0   Convenience Store  0.17
1        Dance Studio  0.17
2                 Gym  0.17
3                 Pub  0.17
4  Athle

           venue  freq
0    Coffee Shop  0.12
1         Bakery  0.08
2  Boat or Ferry  0.05
3        Theater  0.05
4  Historic Site  0.02


----Harbourfront East----
                     venue  freq
0          Harbor / Marina  0.50
1                     Pier  0.25
2                     Park  0.25
3      Monument / Landmark  0.00
4  New American Restaurant  0.00


----Harbourfront West----
                  venue  freq
0           Coffee Shop  0.11
1    Italian Restaurant  0.06
2  Gym / Fitness Center  0.04
3            Restaurant  0.04
4                  Café  0.04


----Henry Farm----
                  venue  freq
0        Clothing Store  0.16
1  Fast Food Restaurant  0.06
2           Coffee Shop  0.06
3            Restaurant  0.04
4              Tea Room  0.04


----High Park----
                                      venue  freq
0  Residential Building (Apartment / Condo)   0.5
1                                      Park   0.5
2                               Yoga Studio   0.0
3      

            venue  freq
0     Coffee Shop  0.30
1            Café  0.07
2        Pharmacy  0.06
3  Sandwich Place  0.06
4       Bookstore  0.04


----Railway Lands----
                  venue  freq
0           Coffee Shop  0.11
1    Italian Restaurant  0.06
2  Gym / Fitness Center  0.04
3            Restaurant  0.04
4                  Café  0.04


----Rathnelly----
                venue  freq
0  Light Rail Station  0.33
1         Coffee Shop  0.33
2        Liquor Store  0.17
3         Supermarket  0.17
4       Movie Theater  0.00


----Richmond----
          venue  freq
0   Coffee Shop  0.07
1          Café  0.06
2    Steakhouse  0.04
3         Hotel  0.04
4  Burger Joint  0.03


----Richview Gardens----
                 venue  freq
0          Pizza Place  0.33
1          Music Venue  0.33
2  Arts & Crafts Store  0.33
3          Yoga Studio  0.00
4  Monument / Landmark  0.00


----Riverdale----
            venue  freq
0  Discount Store   0.2
1        Bus Line   0.2
2   Grocery Store   

4       Pharmacy  0.06


----York Mills West----
               venue  freq
0  Convenience Store  0.25
1               Park  0.25
2          Speakeasy  0.25
3               Bank  0.25
4     Pilates Studio  0.00


----York University----
                    venue  freq
0    Caribbean Restaurant  0.09
1        Sushi Restaurant  0.09
2             Pizza Place  0.09
3    Fast Food Restaurant  0.09
4  Furniture / Home Store  0.09


----Yorkville----
                       venue  freq
0                       Café  0.12
1             Sandwich Place  0.12
2              Historic Site  0.04
3          French Restaurant  0.04
4  Middle Eastern Restaurant  0.04




#### Let's put that into a *pandas* dataframe

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Steakhouse,Hotel,Bakery,Gastropub,Bar,Asian Restaurant,Restaurant,American Restaurant
1,Agincourt,Shopping Mall,Badminton Court,Park,Vietnamese Restaurant,Bubble Tea Shop,Supermarket,Hong Kong Restaurant,Shanghai Restaurant,Chinese Restaurant,Skating Rink
2,Agincourt North,Pharmacy,Sushi Restaurant,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
3,Albion Gardens,Grocery Store,Pizza Place,Park,Coffee Shop,Caribbean Restaurant,Sandwich Place,Fast Food Restaurant,Fried Chicken Joint,Beer Store,Japanese Restaurant
4,Alderwood,Convenience Store,Dance Studio,Recording Studio,Gym,Pub,Athletics & Sports,Fish & Chips Shop,Field,Fish Market,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
194,Woodbine Gardens,Pizza Place,Intersection,Restaurant,Rock Climbing Spot,Café,Fast Food Restaurant,Bus Line,Pharmacy,Breakfast Spot,Gastropub
195,Woodbine Heights,Bus Line,Grocery Store,Pharmacy,Metro Station,Breakfast Spot,Café,Fast Food Restaurant,Middle Eastern Restaurant,Gas Station,Bakery
196,York Mills West,Convenience Store,Speakeasy,Park,Bank,Farmers Market,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
197,York University,Bar,Coffee Shop,Japanese Restaurant,Massage Studio,Bank,Sushi Restaurant,Caribbean Restaurant,Furniture / Home Store,Fast Food Restaurant,Pizza Place


## Cluster Neighborhoods

In [72]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 3, 0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 0, 3, 3, 3, 3,
       3, 1, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 3, 3, 0, 3, 3, 3, 1, 0,
       0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3,
       3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 1, 2, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3])

In [75]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighbors

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()# check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75242,-79.329242,3.0,Bus Stop,Fast Food Restaurant,Park,Food & Drink Shop,Fish Market,Fish & Chips Shop,Field,Flea Market,Electronics Store,Flower Shop
1,M4A,North York,Victoria Village,43.7306,-79.313265,1.0,Park,Grocery Store,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Eastern European Restaurant
2,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166,3.0,Coffee Shop,Bakery,Theater,Boat or Ferry,Health & Beauty Service,Shoe Store,Bank,Café,Ice Cream Shop,Gastropub
3,M6A,North York,Lawrence Heights,43.72327,-79.451286,3.0,Clothing Store,Sushi Restaurant,Cosmetics Shop,American Restaurant,Rental Car Location,Furniture / Home Store,Food Court,Toy / Game Store,Men's Store,Frozen Yogurt Shop
4,M6A,North York,Lawrence Manor,43.72327,-79.451286,3.0,Clothing Store,Sushi Restaurant,Cosmetics Shop,American Restaurant,Rental Car Location,Furniture / Home Store,Food Court,Toy / Game Store,Men's Store,Frozen Yogurt Shop


In [76]:
toronto_merged.dropna(inplace = True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype('int')

Finally, let's visualize the resulting clusters

In [77]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [79]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,0,Construction & Landscaping,Gym / Fitness Center,Park,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Eastern European Restaurant
33,Scarborough,0,Construction & Landscaping,Gym / Fitness Center,Park,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Eastern European Restaurant
34,Scarborough,0,Construction & Landscaping,Gym / Fitness Center,Park,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Eastern European Restaurant
68,North York,0,Construction & Landscaping,Trail,Park,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
149,Central Toronto,0,Park,Gym,Playground,Tennis Court,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
150,Central Toronto,0,Park,Gym,Playground,Tennis Court,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
154,Scarborough,0,Pharmacy,Sushi Restaurant,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
155,Scarborough,0,Pharmacy,Sushi Restaurant,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
156,Scarborough,0,Pharmacy,Sushi Restaurant,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
157,Scarborough,0,Pharmacy,Sushi Restaurant,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Field


In [80]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1,Park,Grocery Store,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Eastern European Restaurant
121,Central Toronto,1,Park,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Eastern European Restaurant
122,Central Toronto,1,Park,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Eastern European Restaurant
123,West Toronto,1,Park,Residential Building (Apartment / Condo),Farmers Market,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Women's Store,Eastern European Restaurant
124,West Toronto,1,Park,Residential Building (Apartment / Condo),Farmers Market,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Women's Store,Eastern European Restaurant
192,Etobicoke,1,Park,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Eastern European Restaurant
193,Etobicoke,1,Park,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Eastern European Restaurant
194,Etobicoke,1,Park,Women's Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Eastern European Restaurant


In [81]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Etobicoke,2,Golf Course,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
16,Etobicoke,2,Golf Course,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
17,Etobicoke,2,Golf Course,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
18,Etobicoke,2,Golf Course,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
19,Etobicoke,2,Golf Course,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market


In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3,Bus Stop,Fast Food Restaurant,Park,Food & Drink Shop,Fish Market,Fish & Chips Shop,Field,Flea Market,Electronics Store,Flower Shop
2,Downtown Toronto,3,Coffee Shop,Bakery,Theater,Boat or Ferry,Health & Beauty Service,Shoe Store,Bank,Café,Ice Cream Shop,Gastropub
3,North York,3,Clothing Store,Sushi Restaurant,Cosmetics Shop,American Restaurant,Rental Car Location,Furniture / Home Store,Food Court,Toy / Game Store,Men's Store,Frozen Yogurt Shop
4,North York,3,Clothing Store,Sushi Restaurant,Cosmetics Shop,American Restaurant,Rental Car Location,Furniture / Home Store,Food Court,Toy / Game Store,Men's Store,Frozen Yogurt Shop
5,Downtown Toronto,3,Coffee Shop,Café,Pharmacy,Sandwich Place,Bookstore,Park,Italian Restaurant,Bar,Indian Restaurant,Sushi Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
204,Etobicoke,3,Chinese Restaurant,Coffee Shop,Italian Restaurant,Sushi Restaurant,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
205,Etobicoke,3,Burrito Place,Bank,Thai Restaurant,Mattress Store,Fish & Chips Shop,Middle Eastern Restaurant,Miscellaneous Shop,Sushi Restaurant,Eastern European Restaurant,Burger Joint
206,Etobicoke,3,Burrito Place,Bank,Thai Restaurant,Mattress Store,Fish & Chips Shop,Middle Eastern Restaurant,Miscellaneous Shop,Sushi Restaurant,Eastern European Restaurant,Burger Joint
207,Etobicoke,3,Burrito Place,Bank,Thai Restaurant,Mattress Store,Fish & Chips Shop,Middle Eastern Restaurant,Miscellaneous Shop,Sushi Restaurant,Eastern European Restaurant,Burger Joint


In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Scarborough,4,Home Service,Electronics Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant
8,Scarborough,4,Home Service,Electronics Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant
91,North York,4,Home Service,Rental Car Location,Farm,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Dumpling Restaurant
97,North York,4,Home Service,Electronics Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant
