In [1]:
# First, as always, we install all the packages we will use
import pandas as pd
import numpy as numpy
import json
!conda install -c conda-forge folium=0.5.0 --yes
import folium
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

Collectingdone
done

# All requested packages already installed.

Collecting package metadata (current_rdone
done

# All requested packages already installed.



In [2]:
#import requests to get data from wikipedia
import requests
toronto_data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
print('Data downloaded!')

Data downloaded!


In [3]:
# I will be using the BeautifulSoup package to scrap the table from Wikipedia
from bs4 import BeautifulSoup
soup = BeautifulSoup(toronto_data.content, "html.parser")

In [5]:
# Lets take an initial llok at the data in the table
table = soup.find_all('table')[0] 
table

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M3B
</td>
<td>North York
</td>
<td>Don Mills
</td></tr>
<tr>
<td>M4B
</td>
<td>East Y

Notice how all the relevant data is in the th,tr,td labelled keys, which is basically a list of the neighborhoods. So, let's define a new variable that includes this data.

In [6]:
post_code = []
boroughs = []
neighborhoods = []

for row in table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) == 3:
        post_code.append(cells[0].find(text=True))
        boroughs.append(cells[1].find(text=True))
        neighborhoods.append(cells[2].find(text=True))

The next task is essentially transforming this data of nested Python list into a pandas dataframe. So let's start by creating an empty dataframe and then seeing what we have thus far

In [7]:
# define the dataframe columns
column_names = ['Postal Code','Borough', 'Neighborhood'] 

# instantiate the dataframe
df = pd.DataFrame(columns=column_names)

Take a look at the empty dataframe to confirm that the columns are as intended.

In [8]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood


In [9]:
# Now we assign the corresponding lists to the appropriate column.
df['Postal Code'] = post_code
df['Borough'] = boroughs
df['Neighborhood'] = neighborhoods
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
...,...,...,...
175,M5Z\n,Not assigned\n,\n
176,M6Z\n,Not assigned\n,\n
177,M7Z\n,Not assigned\n,\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."


In [10]:
#Lets remove all the '\n' new line characters.
df = df.replace('\n','', regex=True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [11]:
# Drop all rows that do not have a borough assigned.
df = df[df.Borough != 'Not assigned' ]

In [12]:
#order the list reset the index 
df.groupby(['Postal Code'])
df.reset_index(drop=True, inplace=True)

In [13]:
#We can test for a unique Postal Code but printing all lines of a randomly chosen Postal Code
df[df.isin(["M4B"]).any(axis=1)]

Unnamed: 0,Postal Code,Borough,Neighborhood
8,M4B,East York,"Parkview Hill, Woodbine Gardens"


In [14]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing Centre
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [15]:
#lets print the shape of the dtaframe to identify the number of rowas as requested.
df.shape

(103, 3)

PART 2

In [16]:
#Lets create an empty dataframe for latitude and longitude
df_lat_long = pd.DataFrame(columns = ['Latitude','Longitude'])
df_lat_long

Unnamed: 0,Latitude,Longitude


In [17]:
#I will be using pgeocode package 
!pip install pgeocode



In [18]:
import pgeocode

In [19]:
# Enter the relevant country code, for Canada it is 'ca'.
nomi = pgeocode.Nominatim('ca')

In [21]:
#Lets create a series of postal codes to iterate through
codes = df['Postal Code']
codes

0      M3A
1      M4A
2      M5A
3      M6A
4      M7A
      ... 
98     M8X
99     M4Y
100    M7Y
101    M8Y
102    M8Z
Name: Postal Code, Length: 103, dtype: object

In [22]:
# Gather the coordinates for all postal codes
lat_list = []
long_list = []
for place in codes:
    lat_list.append(nomi.query_postal_code(place).latitude)
    long_list.append(nomi.query_postal_code(place).longitude)

In [23]:
#Could not find lat and lng for M7R
lat_list[76]
long_list[76]

nan

In [24]:
#USe csv
lat_list[76] =43.6369656
long_list[76] =-79.615819

In [25]:
#Add list of postal codes to df_lat_lng
df_lat_long['Latitude'] = lat_list
df_lat_long['Longitude'] = long_list

In [26]:
df_lat_long

Unnamed: 0,Latitude,Longitude
0,43.7545,-79.3300
1,43.7276,-79.3148
2,43.6555,-79.3626
3,43.7223,-79.4504
4,43.6641,-79.3889
...,...,...
98,43.6518,-79.5076
99,43.6656,-79.3830
100,43.7804,-79.2505
101,43.6325,-79.4939


In [27]:
# Merge the two dataframes 
toronto_df = pd.concat([df, df_lat_long], axis=1, sort=False)
toronto_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.3300
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.6518,-79.5076
99,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.3830
100,M7Y,East Toronto,Business reply mail Processing Centre,43.7804,-79.2505
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.6325,-79.4939


## Part 3

#### Let's create a function to get nearby venues in Toronto like we did for NY

First we need to add our Foursquare credentilas

In [28]:
CLIENT_ID = 'FHCOKXIMACMIIHEVRT3WPXJ2FXIMBYW301UHLUY4MR3BSYFL' # your Foursquare ID
CLIENT_SECRET = 'OT54ZGPSHEQT540TXPVDRJL312B1MNFDUPPFIUZELEZQKN5J' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

radius = 500

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FHCOKXIMACMIIHEVRT3WPXJ2FXIMBYW301UHLUY4MR3BSYFL
CLIENT_SECRET:OT54ZGPSHEQT540TXPVDRJL312B1MNFDUPPFIUZELEZQKN5J


In [98]:
#THIS FUNCTION WAS TAKEN FROM LAB with some modifications 
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now we will run code above on each neighborhood in our dataframe. 

In [91]:
#Run code and create a new dataframe 
toronto_venues = getNearbyVenues(names=toronto_df['Neighborhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

Lets check the size of the resulting df

In [31]:
print(toronto_venues.shape)
toronto_venues.head()

(2162, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7545,-79.33,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.7545,-79.33,Sun Life,43.75476,-79.332783,Construction & Landscaping
2,Parkwoods,43.7545,-79.33,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.7276,-79.3148,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.7276,-79.3148,Tim Hortons,43.725517,-79.313103,Coffee Shop


Let's check how many venues were returned for each neighborhood

In [32]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",7,7,7,7,7,7
"Bathurst Manor, Wilson Heights, Downsview North",6,6,6,6,6,6
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",21,21,21,21,21,21
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",2,2,2,2,2,2
Woburn,1,1,1,1,1,1
Woodbine Heights,6,6,6,6,6,6
York Mills West,3,3,3,3,3,3


Next we will analyze each neighborhood

In [33]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
manhattan_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [34]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.166667,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,York Mills West,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Lets print each neighborhood and the top 5 venues associated with it 

In [35]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

   Sandwich Place  0.10
1         Coffee Shop  0.10
2  Italian Restaurant  0.10
3            Pharmacy  0.05
4                 Pub  0.05


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1                Café  0.06
2               Hotel  0.04
3  Seafood Restaurant  0.04
4        Cocktail Bar  0.03


----Birch Cliff, Cliffside West----
                   venue  freq
0        College Stadium  0.25
1  General Entertainment  0.25
2           Skating Rink  0.25
3                   Café  0.25
4      Accessories Store  0.00


----Brockton, Parkdale Village, Exhibition Place----
                    venue  freq
0             Coffee Shop  0.08
1                    Café  0.08
2  Thrift / Vintage Store  0.05
3          Breakfast Spot  0.05
4               Gift Shop  0.05


----Business reply mail Processing Centre----
           venue  freq
0     Restaurant  0.13
1    Coffee Shop  0.13
2    Yoga Studio  0.07
3           Bank  0.07
4  Deli / Bodega  0.07


----CN Tower, K

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Get top 10 common places

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in numpy.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in numpy.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Breakfast Spot,Badminton Court,Skating Rink,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant
1,"Alderwood, Long Branch",Pharmacy,Convenience Store,Pizza Place,Gym,Sandwich Place,Pub,Coffee Shop,Eastern European Restaurant,Dog Run,Doner Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Fried Chicken Joint,Pizza Place,Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Deli / Bodega,Falafel Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
3,Bayview Village,Flower Shop,Gas Station,Park,Trail,Event Space,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Yoga Studio
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Indian Restaurant,Liquor Store,Butcher,Café,Restaurant,Juice Bar,Sushi Restaurant


Now we will cluster by using kmeans

In [51]:
neighborhoods_venues_sorted.shape

(96, 12)

In [56]:
toronto_merged.shape

(103, 16)

In [38]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
labels = kmeans.labels_

print(labels)

[0 0 0 3 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 3 0 0 0 3 0 0 0 0 3 3
 3 0 0 0 0 0 0 0 0 0 3 0 0 2 0 0 3 0 3 0 0 2 0 0 3 0 0 0 3 1 0 0 0 3 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 2 0 0 3 0]


Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [39]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.7545,-79.33,3.0,Construction & Landscaping,Park,Food & Drink Shop,Health Food Store,Hawaiian Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
1,M4A,North York,Victoria Village,43.7276,-79.3148,0.0,Hockey Arena,French Restaurant,Coffee Shop,Intersection,Portuguese Restaurant,Park,Pizza Place,Fast Food Restaurant,Farmers Market,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626,0.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Gym / Fitness Center,Italian Restaurant,Food Truck,Event Space,Electronics Store,Distribution Center
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504,0.0,Clothing Store,Coffee Shop,Restaurant,Bakery,Toy / Game Store,Sandwich Place,Pharmacy,Sushi Restaurant,Cosmetics Shop,Men's Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889,0.0,Gym,Sushi Restaurant,Creperie,Theater,Martial Arts Dojo,Bubble Tea Shop,Café,Ethiopian Restaurant,Mexican Restaurant,Ramen Restaurant


Finally, let's visualize the resulting clusters

In [109]:
toronto_merged[toronto_merged.isna().any(axis=1)]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
88,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.6075,-79.5013,,,,,,,,,,,
95,M1X,Scarborough,Upper Rouge,43.834,-79.2069,,,,,,,,,,,


Foursquare does not seem to work with these two location when we serached for nearby venues 
So we will get ride of them

In [111]:
toronto_clustered = toronto_merged[toronto_merged['Cluster Labels'].notna()]

In [101]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude 

In [115]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = numpy.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(numpy.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_clustered['Latitude'], toronto_clustered['Longitude'], toronto_clustered['Neighborhood'], toronto_clustered['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[ int(cluster) -1],
        fill=True,
        fill_color=rainbow[ int(cluster) -1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [116]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 0, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0.0,Hockey Arena,French Restaurant,Coffee Shop,Intersection,Portuguese Restaurant,Park,Pizza Place,Fast Food Restaurant,Farmers Market,Falafel Restaurant
2,Downtown Toronto,0.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Gym / Fitness Center,Italian Restaurant,Food Truck,Event Space,Electronics Store,Distribution Center
3,North York,0.0,Clothing Store,Coffee Shop,Restaurant,Bakery,Toy / Game Store,Sandwich Place,Pharmacy,Sushi Restaurant,Cosmetics Shop,Men's Store
4,Downtown Toronto,0.0,Gym,Sushi Restaurant,Creperie,Theater,Martial Arts Dojo,Bubble Tea Shop,Café,Ethiopian Restaurant,Mexican Restaurant,Ramen Restaurant
5,Etobicoke,0.0,Pharmacy,Park,Skating Rink,Grocery Store,Bank,Fish & Chips Shop,Field,Fish Market,Fast Food Restaurant,Farmers Market
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,0.0,Café,Coffee Shop,Hotel,Restaurant,Japanese Restaurant,Gym,Salad Place,Seafood Restaurant,American Restaurant,Steakhouse
98,Etobicoke,0.0,Smoke Shop,Sushi Restaurant,Bank,Breakfast Spot,Dessert Shop,Bar,Bakery,Pub,Coffee Shop,Restaurant
99,Downtown Toronto,0.0,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Restaurant,Gay Bar,Gastropub,Grocery Store,Hotel,Mediterranean Restaurant,Men's Store
100,East Toronto,0.0,Restaurant,Coffee Shop,Bookstore,Sushi Restaurant,Furniture / Home Store,Bank,Japanese Restaurant,Italian Restaurant,Yoga Studio,Department Store


### Cluster 2

In [117]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 1, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,1.0,IT Services,Yoga Studio,Doner Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market


### Cluster 3

In [118]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 2, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,2.0,Home Service,Yoga Studio,Doner Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
52,North York,2.0,Home Service,Playground,Yoga Studio,Falafel Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Fast Food Restaurant
101,Etobicoke,2.0,Home Service,Baseball Field,Park,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Fast Food Restaurant


### Cluster 4

In [119]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 3, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3.0,Construction & Landscaping,Park,Food & Drink Shop,Health Food Store,Hawaiian Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
7,North York,3.0,Park,Gym,Construction & Landscaping,Electronics Store,Pool,Home Service,River,Field,Fast Food Restaurant,Farmers Market
13,North York,3.0,Park,Gym,Construction & Landscaping,Electronics Store,Pool,Home Service,River,Field,Fast Food Restaurant,Farmers Market
21,York,3.0,Park,Sporting Goods Shop,Women's Store,Bakery,Beer Store,Gym,Mexican Restaurant,Falafel Restaurant,Electronics Store,Ethiopian Restaurant
27,North York,3.0,Park,Residential Building (Apartment / Condo),Yoga Studio,Dog Run,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant
32,Scarborough,3.0,Park,Grocery Store,Yoga Studio,Falafel Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farmers Market
35,East York,3.0,Convenience Store,Park,Greek Restaurant,Coffee Shop,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farmers Market
36,Downtown Toronto,3.0,Café,Park,Harbor / Marina,Music Venue,Yoga Studio,Event Space,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Farmers Market
39,North York,3.0,Flower Shop,Gas Station,Park,Trail,Event Space,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Yoga Studio
61,Central Toronto,3.0,Photography Studio,Park,Lawyer,Doner Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
