# Importing libraries

In [2]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests

print('Libraries imported.')

Libraries imported.


# Getting data from Wikipedia

In [3]:
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=946126446"

toronto_data  = requests.get(url).text

# Creating a dataframe

In [4]:
soup = BeautifulSoup(toronto_data,"html5lib")  # Parse the data

table_contents=[]

for i in soup.find('table',{"class":"wikitable sortable"}).find('tbody').find_all('tr'):
    row = i.find_all("td")
    cell = {}
    if(row):
        if row[1].text != 'Not assigned': # Not select cells with a borough that is Not assigned.
            cell['PostalCode'] = row[0].text
            cell['Borough'] = row[1].text
            cell['Neighborhood'] = row[2].text
            table_contents.append(cell)
            
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

df.head() 

Unnamed: 0,Borough,Neighborhood,PostalCode
0,North York,Parkwoods\n,M3A
1,North York,Victoria Village\n,M4A
2,Downtown Toronto,Harbourfront\n,M5A
3,North York,Lawrence Heights\n,M6A
4,North York,Lawrence Manor\n,M6A


Check cells who have Not assigned neighborhood

In [5]:
nan_neighborhood = (df['Neighborhood'] == 'Not assigned')
df[nan_neighborhood]

Unnamed: 0,Borough,Neighborhood,PostalCode


There is no Not assigned neighborhood.

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210 entries, 0 to 209
Data columns (total 3 columns):
Borough         210 non-null object
Neighborhood    210 non-null object
PostalCode      210 non-null object
dtypes: object(3)
memory usage: 5.0+ KB


Dataframe has 210 rows.

In [7]:
df['PostalCode'].value_counts()

M8Y    8
M9V    8
M5V    7
M8Z    5
M9B    5
M4V    5
M9C    4
M9R    4
M6M    4
M1V    4
M1L    3
M1K    3
M6L    3
M5T    3
M1T    3
M1M    3
M5J    3
M8V    3
M1P    3
M3H    3
M1E    3
M5H    3
M2J    3
M1C    3
M8X    3
M6K    3
M5R    3
M6H    2
M5X    2
M1R    2
      ..
M7Y    1
M9A    1
M1G    1
M2H    1
M4W    1
M9P    1
M4G    1
M4M    1
M2N    1
M4H    1
M5W    1
M6B    1
M1J    1
M4J    1
M1W    1
M4S    1
M4N    1
M3A    1
M4R    1
M4C    1
M6E    1
M5C    1
M5E    1
M9N    1
M2K    1
M3L    1
M9L    1
M7R    1
M2P    1
M1S    1
Name: PostalCode, Length: 103, dtype: int64

More than one neighborhood exist in one postal code area. These rows should be combined into one row with the neighborhoods separated with a comma. 

In [8]:
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(list)
df = df.sample(frac=1).reset_index()
df['Neighborhood'] = df['Neighborhood'].str.join(', ')
df['Neighborhood'] = df['Neighborhood'].replace('\n','',regex=True) # Delete \n
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M2P,North York,York Mills West
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M9A,Etobicoke,Islington Avenue
3,M2M,North York,"Newtonbrook, Willowdale"
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern
5,M8Z,Etobicoke,"Kingsway Park South West, Mimico NW, The Queen..."
6,M7A,Downtown Toronto,Queen's Park
7,M2L,North York,"Silver Hills, York Mills"
8,M3A,North York,Parkwoods
9,M9W,Etobicoke,Northwest


In [9]:
df.shape

(103, 3)

# Adding latitude and longitude

In [10]:
Geospatial_data = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
Geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df = pd.merge(df, Geospatial_data, left_on="PostalCode", right_on="Postal Code")
df.drop(columns = 'Postal Code', inplace = True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M2P,North York,York Mills West,43.752758,-79.400049
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
3,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558


# Explore and cluster the neighborhoods in Toronto

Let's get the geographical coordinates of Toronto.

In [12]:
from geopy.geocoders import Nominatim

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="app01")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


Create a dataframe which obtains only the neighborhoods in Toronto.

In [13]:
df_toronto = df[df['Borough'].str.contains('Toronto')]
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
6,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
10,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846
11,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
12,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


Create a map of Toronto with neighborhoods superimposed on top

In [14]:
import folium
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [35]:
CLIENT_ID = '****' # your Foursquare ID
CLIENT_SECRET = '****' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ****
CLIENT_SECRET:****


Now, let's get the top venues that are in Toronto within a radius of 500 meters.

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.

In [17]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

Business Reply Mail Processing Centre 969 Eastern
Queen's Park
Stn A PO Boxes 25 The Esplanade
The Danforth West, Riverdale
Parkdale, Roncesvalles
Commerce Court, Victoria Hotel
First Canadian Place, Underground city
Harbourfront
North Toronto West
Dovercourt Village, Dufferin
Moore Park, Summerhill East
Runnymede, Swansea
Lawrence Park
The Annex, North Midtown, Yorkville
Central Bay Street
Berczy Park
Adelaide, King, Richmond
The Beaches West, India Bazaar
Roselawn
Cabbagetown, St. James Town
High Park, The Junction South
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Chinatown, Grange Park, Kensington Market
Rosedale
Davisville
Little Portugal, Trinity
Christie
Studio District
Brockton, Exhibition Place, Parkdale Village
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Forest Hill North, Forest Hill West
St. James Town
Church and Wellesley
Deer Park, Forest Hill SE, Rathnelly, South 

Let's check the size of the resulting dataframe.

In [18]:
print(toronto_venues.shape)
toronto_venues.head()

(1607, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,Rorschach Brewing Co.,43.663483,-79.319824,Brewery
1,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,Leslieville Farmers Market,43.664901,-79.319784,Farmers Market
2,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,The Sidekick,43.664484,-79.325162,Comic Shop
3,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,Chino Locos,43.664653,-79.325584,Burrito Place
4,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,Queen Margherita Pizza,43.664685,-79.324164,Pizza Place


# Analyze Each Neighborhood

In [19]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[161]] + list(toronto_onehot.columns[:161]) + list(toronto_onehot.columns[162:])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Adult Boutique,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
toronto_onehot.shape

(1607, 233)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [21]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Adult Boutique,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.021505,0.0,0.0,...,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
toronto_grouped.shape

(39, 233)

Let's print each neighborhood along with the top 5 most common venues

In [23]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.11
1             Café  0.05
2       Restaurant  0.04
3  Thai Restaurant  0.03
4    Deli / Bodega  0.03


----Berczy Park----
          venue  freq
0   Coffee Shop  0.10
1  Cocktail Bar  0.05
2        Bakery  0.05
3      Pharmacy  0.03
4    Restaurant  0.03


----Brockton, Exhibition Place, Parkdale Village----
            venue  freq
0            Café  0.14
1  Breakfast Spot  0.09
2     Coffee Shop  0.09
3   Grocery Store  0.05
4    Intersection  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3          Comic Shop  0.06
4                Park  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3   Harbor / Ma

Let's put that into a pandas dataframe. Firstly, write a function to sort the venues in descending order

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Restaurant,Deli / Bodega,Thai Restaurant,Gym,Bookstore,Steakhouse,Sushi Restaurant,Burrito Place
1,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Beer Bar,Cheese Shop,Pharmacy,Restaurant,Seafood Restaurant,Farmers Market,Jazz Club
2,"Brockton, Exhibition Place, Parkdale Village",Café,Breakfast Spot,Coffee Shop,Burrito Place,Italian Restaurant,Stadium,Restaurant,Intersection,Bar,Bakery
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Spa
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden,Coffee Shop,Plane,Rental Car Location,Boat or Ferry,Bar


# Clustering

In [26]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Light Rail Station,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Spa
6,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,3,Coffee Shop,Sushi Restaurant,Bank,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Portuguese Restaurant,Park
10,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,3,Coffee Shop,Seafood Restaurant,Bakery,Café,Cocktail Bar,Japanese Restaurant,Italian Restaurant,Restaurant,Hotel,Beer Bar
11,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,3,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Fruit & Vegetable Store,Pub,Pizza Place,Lounge,Juice Bar
12,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,3,Gift Shop,Breakfast Spot,Dog Run,Bar,Bookstore,Dessert Shop,Restaurant,Italian Restaurant,Movie Theater,Bank


Let's visualize the resulting clusters.

In [28]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

### Cluster 1

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
78,Central Toronto,0,Jewelry Store,Trail,Sushi Restaurant,Bus Line,Yoga Studio,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
90,East Toronto,0,Trail,Health Food Store,Pub,Yoga Studio,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


### Cluster 2

In [30]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Central Toronto,1,Garden,Yoga Studio,Department Store,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


### Cluster 3

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,2,Park,Yoga Studio,Department Store,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


### Cluster 4

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,3,Light Rail Station,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Spa
6,Downtown Toronto,3,Coffee Shop,Sushi Restaurant,Bank,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Portuguese Restaurant,Park
10,Downtown Toronto,3,Coffee Shop,Seafood Restaurant,Bakery,Café,Cocktail Bar,Japanese Restaurant,Italian Restaurant,Restaurant,Hotel,Beer Bar
11,East Toronto,3,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Fruit & Vegetable Store,Pub,Pizza Place,Lounge,Juice Bar
12,West Toronto,3,Gift Shop,Breakfast Spot,Dog Run,Bar,Bookstore,Dessert Shop,Restaurant,Italian Restaurant,Movie Theater,Bank
13,Downtown Toronto,3,Coffee Shop,Restaurant,Café,Hotel,Italian Restaurant,Gym,American Restaurant,Deli / Bodega,Japanese Restaurant,Seafood Restaurant
15,Downtown Toronto,3,Coffee Shop,Café,Hotel,Japanese Restaurant,Gym,Restaurant,Salad Place,Steakhouse,American Restaurant,Asian Restaurant
18,Downtown Toronto,3,Coffee Shop,Park,Café,Bakery,Pub,Breakfast Spot,Restaurant,Theater,Yoga Studio,Chocolate Shop
19,Central Toronto,3,Sporting Goods Shop,Coffee Shop,Clothing Store,Yoga Studio,Ice Cream Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Park,Chinese Restaurant
20,West Toronto,3,Bakery,Pharmacy,Pet Store,Liquor Store,Middle Eastern Restaurant,Bar,Café,Supermarket,Music Venue,Pizza Place


### Cluster 5

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
59,Downtown Toronto,4,Park,Playground,Trail,Yoga Studio,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
