<h1>List of postal codes of Canada</h1>

In [1]:
# import libraries
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim 
import folium
# import k-means from clustering stage
from sklearn.cluster import KMeans

<h2>Creating data frame  Part - 1</h2>

In [2]:
# define datasource url
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(URL) 
soup = BeautifulSoup(r.content, 'html5lib') 

In [3]:
# store table data to table varible
a=[]
b=[]
c=[]
table=soup.find('table', class_='wikitable sortable')

In [4]:
# fetch column of each row and append it to lists a,b,c
for row in table.findAll("tr"):
    cells=row.findAll("td")
    try:
        a.append(cells[0].find(text=True))
        b.append(cells[1].find(text=True))
        c.append(cells[2].find(text=True))
    except:
        pass

In [5]:
# define a data frame and assign lists a,b & c to dataframe as columns
df=pd.DataFrame(a,columns=['Postcode'])
df['Borough']=b
df['Neighbourhood']=c
df = df.replace('\n','', regex=True)

In [6]:
df.describe()

Unnamed: 0,Postcode,Borough,Neighbourhood
count,288,288,288
unique,180,12,209
top,M9V,Not assigned,Not assigned
freq,8,77,78


In [8]:
# removing Not Assigned Borough
df_filter=df[df['Borough']!='Not assigned']

In [9]:
# replacing Not assigned Neighbourhood with corresponding Borough

for index, row in df_filter.iterrows() :
    if row['Neighbourhood']=='Not assigned':
        row['Neighbourhood']=row['Borough']
df_new=df_filter

In [10]:
df_filter=df_filter.groupby(["Postcode","Borough"]).Neighbourhood.agg([('Neighbourhood',','.join)]).reset_index()

In [11]:
df_filter.describe()

Unnamed: 0,Postcode,Borough,Neighbourhood
count,103,103,103
unique,103,11,103
top,M1L,North York,Upper Rouge
freq,1,24,1


In [12]:
df_filter

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


<h2>Get latitude and Langitude for Boroughs --- Part -2</h2>

In [13]:
data=pd.read_csv('Geospatial_Coordinates.csv')

In [14]:
data.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_filter['Latitude']=data['Latitude']
df_filter['Longitude']=data['Longitude']

In [18]:
df_filter

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


<h2>Plot map for Toronto and mark Borough locations -- Part -3</h2>

In [19]:

#address = 'New York City, NY'
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [20]:

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough in zip(df_filter['Latitude'], df_filter['Longitude'], df_filter['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 

map_toronto

In [21]:
central_data = df_filter[df_filter['Borough'] == 'Central Toronto'].reset_index(drop=True)
central_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316


<h2>Plot Map for central Toronto and mark it's Neighbours</h2>

In [22]:
address = 'Central Toronto'

geolocator = Nominatim(user_agent="Central_toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Central Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Central Toronto are 43.653963, -79.387207.


In [197]:
# create map of Downtown Toronto using latitude and longitude values
map_central = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(central_data['Latitude'], central_data['Longitude'],central_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_central)  
    
map_central

<h3>Explore Neighbourhoods in Central Toronto</h3>

In [23]:
CLIENT_ID = '2GY2DGZ2WOXNNKBHZIDRTCTVLHYZJGY2214GF4O1WPMWXDHC' # your Foursquare ID
CLIENT_SECRET = 'SE1DCOXXYGQOULSZ2VEPYAV0SJ2JRNUNWGBDOEMD1XXUF0KA' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2GY2DGZ2WOXNNKBHZIDRTCTVLHYZJGY2214GF4O1WPMWXDHC
CLIENT_SECRET:SE1DCOXXYGQOULSZ2VEPYAV0SJ2JRNUNWGBDOEMD1XXUF0KA


In [25]:
# function to get venues of each Neighbourhood

def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT = 100 ):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
# get the venues of each neighbourhood
central_venues = getNearbyVenues(names=central_data['Neighbourhood'],
                                   latitudes=central_data['Latitude'],
                                   longitudes=central_data['Longitude']
                                  )

Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Roselawn
Forest Hill North,Forest Hill West
The Annex,North Midtown,Yorkville


In [27]:
print(central_venues.shape)
central_venues.head()

(115, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
2,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
3,Davisville North,43.712751,-79.390197,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop
4,Davisville North,43.712751,-79.390197,Homeway Restaurant & Brunch,43.712641,-79.391557,Breakfast Spot


In [28]:
# let's see how many venues returned by each neighbourhood
central_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,35,35,35,35,35,35
Davisville North,8,8,8,8,8,8
"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",16,16,16,16,16,16
"Forest Hill North,Forest Hill West",4,4,4,4,4,4
Lawrence Park,3,3,3,3,3,3
"Moore Park,Summerhill East",3,3,3,3,3,3
North Toronto West,22,22,22,22,22,22
Roselawn,3,3,3,3,3,3
"The Annex,North Midtown,Yorkville",21,21,21,21,21,21


In [29]:
#Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(central_venues['Venue Category'].unique())))

There are 61 uniques categories.


<h2>Analyze each Neighbourhood</h2>

In [30]:
# one hot encoding
central_onehot = pd.get_dummies(central_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
central_onehot['Neighbourhood'] = central_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [central_onehot.columns[-1]] + list(central_onehot.columns[:-1])
central_onehot = central_onehot[fixed_columns]

central_onehot.head(10)

Unnamed: 0,Neighbourhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,...,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
central_onehot.shape

(115, 62)

In [32]:
#Next, let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category
central_grouped = central_onehot.groupby('Neighbourhood').mean().reset_index()
central_grouped

Unnamed: 0,Neighbourhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,...,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.057143,0.0,...,0.0,0.0,0.057143,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0
3,"Forest Hill North,Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
4,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
5,"Moore Park,Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455
7,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"The Annex,North Midtown,Yorkville",0.047619,0.047619,0.0,0.0,0.0,0.047619,0.0,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0


In [33]:
central_grouped.shape

(9, 62)

In [34]:
#Let's print each neighbourhood along with the top 5 most common venues

num_top_venues = 5

for hood in central_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = central_grouped[central_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
              venue  freq
0      Dessert Shop  0.09
1    Sandwich Place  0.09
2   Thai Restaurant  0.06
3  Sushi Restaurant  0.06
4       Pizza Place  0.06


----Davisville North----
            venue  freq
0  Clothing Store  0.12
1           Hotel  0.12
2         Dog Run  0.12
3             Gym  0.12
4  Sandwich Place  0.12


----Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West----
                 venue  freq
0                  Pub  0.12
1          Coffee Shop  0.12
2  American Restaurant  0.06
3           Restaurant  0.06
4          Pizza Place  0.06


----Forest Hill North,Forest Hill West----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2     Sushi Restaurant  0.25
3                 Park  0.25
4  American Restaurant  0.00


----Lawrence Park----
                 venue  freq
0             Bus Line  0.33
1          Swim School  0.33
2                 Park  0.33
3  American Restaurant  0.00
4   Salon / Barbersh

In [35]:
#Let's put that into a pandas dataframe
#First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#=========================================================================================
#Now let's create the new dataframe and display the top 10 venues for each neighbourhood.==
#=========================================================================================
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
central_venues_sorted = pd.DataFrame(columns=columns)
central_venues_sorted['Neighbourhood'] = central_grouped['Neighbourhood']

for ind in np.arange(central_grouped.shape[0]):
    central_venues_sorted.iloc[ind, 1:] = return_most_common_venues(central_grouped.iloc[ind, :], num_top_venues)

central_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Sandwich Place,Dessert Shop,Gym,Café,Coffee Shop,Pizza Place,Thai Restaurant,Italian Restaurant,Sushi Restaurant,Pharmacy
1,Davisville North,Dog Run,Clothing Store,Food & Drink Shop,Hotel,Breakfast Spot,Sandwich Place,Gym,Park,Fried Chicken Joint,Garden
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",Coffee Shop,Pub,American Restaurant,Pizza Place,Bagel Shop,Liquor Store,Vietnamese Restaurant,Sushi Restaurant,Supermarket,Health & Beauty Service
3,"Forest Hill North,Forest Hill West",Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Gift Shop,Farmers Market,Food & Drink Shop,Fried Chicken Joint,Garden
4,Lawrence Park,Swim School,Bus Line,Park,Yoga Studio,Dog Run,History Museum,Health & Beauty Service,Gym / Fitness Center,Gym,Greek Restaurant


<h2>Cluster Neighbourhoods</h2>

In [36]:
# set number of clusters
kclusters = 5

central_grouped_clustering = central_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(central_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 4, 3, 2, 0, 1, 0])

In [37]:
# add clustering labels
central_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)
central_merged = central_data
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
central_merged = central_merged.join(central_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

central_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Swim School,Bus Line,Park,Yoga Studio,Dog Run,History Museum,Health & Beauty Service,Gym / Fitness Center,Gym,Greek Restaurant
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Dog Run,Clothing Store,Food & Drink Shop,Hotel,Breakfast Spot,Sandwich Place,Gym,Park,Fried Chicken Joint,Garden
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Clothing Store,Sporting Goods Shop,Coffee Shop,Yoga Studio,Gym / Fitness Center,Gift Shop,Mexican Restaurant,Park,Diner,Dessert Shop
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Sandwich Place,Dessert Shop,Gym,Café,Coffee Shop,Pizza Place,Thai Restaurant,Italian Restaurant,Sushi Restaurant,Pharmacy
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316,2,Gym,Summer Camp,Playground,Yoga Studio,Diner,History Museum,Health & Beauty Service,Gym / Fitness Center,Greek Restaurant,Gourmet Shop


In [234]:
# let's visulaize the clusters
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(central_merged['Latitude'], central_merged['Longitude'], central_merged['Neighbourhood'], central_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Examine clusters</h3>

In [38]:
central_merged.loc[central_merged['Cluster Labels'] == 0, central_merged.columns[[2] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Davisville North,0,Dog Run,Clothing Store,Food & Drink Shop,Hotel,Breakfast Spot,Sandwich Place,Gym,Park,Fried Chicken Joint,Garden
2,North Toronto West,0,Clothing Store,Sporting Goods Shop,Coffee Shop,Yoga Studio,Gym / Fitness Center,Gift Shop,Mexican Restaurant,Park,Diner,Dessert Shop
3,Davisville,0,Sandwich Place,Dessert Shop,Gym,Café,Coffee Shop,Pizza Place,Thai Restaurant,Italian Restaurant,Sushi Restaurant,Pharmacy
5,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",0,Coffee Shop,Pub,American Restaurant,Pizza Place,Bagel Shop,Liquor Store,Vietnamese Restaurant,Sushi Restaurant,Supermarket,Health & Beauty Service
8,"The Annex,North Midtown,Yorkville",0,Café,Sandwich Place,Coffee Shop,American Restaurant,Indian Restaurant,Jewish Restaurant,Liquor Store,Park,Pharmacy,Pizza Place


In [39]:
central_merged.loc[central_merged['Cluster Labels'] == 1, central_merged.columns[[2] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Roselawn,1,Home Service,Health & Beauty Service,Garden,Yoga Studio,Indian Restaurant,History Museum,Gym / Fitness Center,Gym,Greek Restaurant,Gourmet Shop


In [40]:
central_merged.loc[central_merged['Cluster Labels'] == 2, central_merged.columns[[2] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Moore Park,Summerhill East",2,Gym,Summer Camp,Playground,Yoga Studio,Diner,History Museum,Health & Beauty Service,Gym / Fitness Center,Greek Restaurant,Gourmet Shop


In [41]:
central_merged.loc[central_merged['Cluster Labels'] == 3, central_merged.columns[[2] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lawrence Park,3,Swim School,Bus Line,Park,Yoga Studio,Dog Run,History Museum,Health & Beauty Service,Gym / Fitness Center,Gym,Greek Restaurant


In [42]:
central_merged.loc[central_merged['Cluster Labels'] == 4, central_merged.columns[[2] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,"Forest Hill North,Forest Hill West",4,Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Gift Shop,Farmers Market,Food & Drink Shop,Fried Chicken Joint,Garden
