Parsing data from wikipedia

In [29]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.request
import numpy as np

# Use SHIFT+TAB keys to popup inplace code help
%config IPCompleter.greedy = True

# Output multiple statements from one input cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
table_from_top = 1
wikipedia_page = 'List_of_postal_codes_of_Canada'
trace = False

In [3]:
wikipedia_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'.format(wikipedia_page)
page = requests.get(wikipedia_url)
soup = BeautifulSoup(page.content, 'lxml')
tables = soup.find_all('table', {'class': 'wikitable'})
table = tables[table_from_top - 1]

In [4]:
feature_names = []

header_row = table.find('tr')
for header in header_row.find_all('th'):
    feature_name = ' '.join(header.find_all(text=True))
    feature_name.replace('\n', '')
    feature_names.append(feature_name)

'Postal Code'

'Borough'

'Neighbourhood'

In [132]:
def has_coords(tag):
    if tag.has_attr('class'):
        if tag['class'][0] == 'latitude' or tag['class'][0] == 'longitude':
            return True
    return False

def get_coords(child):
    coords = []
    for coord in child.find_all(has_coords):
        coords.append(coord.string)
    if coords:
        if trace:
            return 'C = {}'.format(' '.join(coords))
        else:
            return ' '.join(coords)
    else:
        return ''

samples = []
sample_rows = table.find_all('tr')[1:]
for sample_row in sample_rows:
    features = []
    for feature_col in sample_row.find_all('td'):
        feature_value = ''
        text = feature_col.string
        if text:
            if trace:
                features.append('T = {}'.format(text))
            else:
                features.append(text)
            continue
        
        for child in feature_col.children:
            if child.name == 'span':
                if child.has_attr('class'):
                    if child['class'] == 'display:none':
                        continue
                if child.find_all(has_coords):
                    feature_value = get_coords(child)
                    if feature_value:
                        break
                    else:
                        continue
            if child.name == 'sup':
                continue
            if child.name == 'a':
                if child.string[0] == '[':
                    continue            
            if child.name == 'a':
                if trace:
                    feature_value = 'A = {}'.format(child.string)
                else:
                    feature_value = child.string
                break
            if child.name == 'font':
                if trace:
                    feature_value = 'F = {}'.format(child.string)
                else:
                    feature_value = child.string
                break
            try:
                # feature_value = '' for any tags not covered above
                content = child.contents
            except AttributeError:
                # Handle whitespace between child tags, treated as a child string
                if child.isspace():
                    continue
                if trace:
                    feature_value = 'E = {}'.format(child)
                else:
                    feature_value = child
                break
        features.append(feature_value)
    samples.append(dict(zip(feature_names, features)))

In [145]:
df = pd.DataFrame(samples)
df.head()
df.tail()

Unnamed: 0,Postal Code\n,Borough\n,Neighbourhood\n
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


Unnamed: 0,Postal Code\n,Borough\n,Neighbourhood\n
175,M5Z\n,Not assigned\n,Not assigned\n
176,M6Z\n,Not assigned\n,Not assigned\n
177,M7Z\n,Not assigned\n,Not assigned\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."
179,M9Z\n,Not assigned\n,Not assigned\n


In [146]:
df.rename(columns={'Postal Code\n': 'Postal Code', 'Borough\n': 'Borough', 'Neighbourhood\n': 'Neighborhood'}, inplace=True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
...,...,...,...
175,M5Z\n,Not assigned\n,Not assigned\n
176,M6Z\n,Not assigned\n,Not assigned\n
177,M7Z\n,Not assigned\n,Not assigned\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."


In [147]:
df['Postal Code'] = df['Postal Code'].str.replace(r'\n', '')
df['Borough'] = df['Borough'].str.replace(r'\n', '')
df['Neighborhood'] = df['Neighborhood'].str.replace(r'\n', '')

In [148]:
df = df[df.Borough != 'Not assigned']

In [149]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [11]:
df.shape

(103, 3)

Adding coordinates

In [12]:
!pip install geocoder



In [153]:
Postal_Code = df['Postal Code']
len(Postal_Code)

103

In [15]:
import geocoder

In [154]:
latitude=[]
longitude=[]
for code in df['Postal Code']:
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    print(code, g.latlng)
    while (g.latlng is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
        print(code, g.latlng)
    latlng = g.latlng
    latitude.append(latlng[0])
    longitude.append(latlng[1])

M3A [43.75245000000007, -79.32990999999998]
M4A [43.73057000000006, -79.31305999999995]
M5A [43.65512000000007, -79.36263999999994]
M6A [43.72327000000007, -79.45041999999995]
M7A [43.66253000000006, -79.39187999999996]
M9A [43.662630000000036, -79.52830999999998]
M1B [43.811390000000074, -79.19661999999994]
M3B [43.74923000000007, -79.36185999999998]
M4B [43.70718000000005, -79.31191999999999]
M5B [43.65739000000008, -79.37803999999994]
M6B [43.70687000000004, -79.44811999999996]
M9B [43.65034000000003, -79.55361999999997]
M1C [43.78574000000003, -79.15874999999994]
M3C [43.72168000000005, -79.34351999999996]
M4C [43.68970000000007, -79.30681999999996]
M5C [43.65215000000006, -79.37586999999996]
M6C [43.69211000000007, -79.43035999999995]
M9C [43.64857000000006, -79.57824999999997]
M1E [43.765750000000025, -79.17469999999997]
M4E [43.67709000000008, -79.29546999999997]
M5E [43.64536000000004, -79.37305999999995]
M6E [43.68784000000005, -79.45045999999996]
M1G [43.76812000000007, -79.2

In [158]:
#latitudef = []
#for item in latitude:
#    latitudef.append(float(item))

#x=print(latitudef)
#type(x)
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.75245,-79.32991
3,M4A,North York,Victoria Village,43.73057,-79.31306
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
165,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [159]:
df['Latitude'] = latitude
df['Longitude'] = longitude

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [157]:
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)
print(df.dtypes)

Postal Code      object
Borough          object
Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Clustering

In [20]:
CLIENT_ID = 'FUD1ZSKEBF30D3424K0LBAZCUWDSLSQQPLYFCAVQ3EQSUJR2' # your Foursquare ID
CLIENT_SECRET = '0PXRI5XYRPT32PR4YDHC2AKNGTX5O0MX42OOPXN2XDZIB2WU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FUD1ZSKEBF30D3424K0LBAZCUWDSLSQQPLYFCAVQ3EQSUJR2
CLIENT_SECRET:0PXRI5XYRPT32PR4YDHC2AKNGTX5O0MX42OOPXN2XDZIB2WU


In [163]:
df=df.reset_index(drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [164]:
df.loc[0, 'Neighborhood']

'Parkwoods'

In [45]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.5 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [108]:
import folium
from geopy.geocoders import Nominatim 

import json

In [110]:
!pip install json

[31mERROR: Could not find a version that satisfies the requirement json (from versions: none)[0m
[31mERROR: No matching distribution found for json[0m


In [165]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [166]:
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

<folium.vector_layers.CircleMarker at 0x7f1117304550>

<folium.vector_layers.CircleMarker at 0x7f1120eaae10>

<folium.vector_layers.CircleMarker at 0x7f1120eaa390>

<folium.vector_layers.CircleMarker at 0x7f11172c22d0>

<folium.vector_layers.CircleMarker at 0x7f111e9e62d0>

<folium.vector_layers.CircleMarker at 0x7f11172c2ad0>

<folium.vector_layers.CircleMarker at 0x7f11172c2ed0>

<folium.vector_layers.CircleMarker at 0x7f11172c2990>

<folium.vector_layers.CircleMarker at 0x7f111e954750>

<folium.vector_layers.CircleMarker at 0x7f11172bafd0>

<folium.vector_layers.CircleMarker at 0x7f11172bae10>

<folium.vector_layers.CircleMarker at 0x7f11172ba910>

<folium.vector_layers.CircleMarker at 0x7f11172ba890>

<folium.vector_layers.CircleMarker at 0x7f1117287850>

<folium.vector_layers.CircleMarker at 0x7f11172baa50>

<folium.vector_layers.CircleMarker at 0x7f1117248d10>

<folium.vector_layers.CircleMarker at 0x7f1117248a50>

<folium.vector_layers.CircleMarker at 0x7f1117248fd0>

<folium.vector_layers.CircleMarker at 0x7f1117248310>

<folium.vector_layers.CircleMarker at 0x7f1117267090>

<folium.vector_layers.CircleMarker at 0x7f1117267690>

<folium.vector_layers.CircleMarker at 0x7f1117287350>

<folium.vector_layers.CircleMarker at 0x7f1117267a10>

<folium.vector_layers.CircleMarker at 0x7f11172679d0>

<folium.vector_layers.CircleMarker at 0x7f1117267c90>

<folium.vector_layers.CircleMarker at 0x7f1117267f10>

<folium.vector_layers.CircleMarker at 0x7f1117271150>

<folium.vector_layers.CircleMarker at 0x7f1117271210>

<folium.vector_layers.CircleMarker at 0x7f1117271690>

<folium.vector_layers.CircleMarker at 0x7f1117271910>

<folium.vector_layers.CircleMarker at 0x7f1117271d50>

<folium.vector_layers.CircleMarker at 0x7f1117271e10>

<folium.vector_layers.CircleMarker at 0x7f111727a110>

<folium.vector_layers.CircleMarker at 0x7f111727a250>

<folium.vector_layers.CircleMarker at 0x7f11172715d0>

<folium.vector_layers.CircleMarker at 0x7f111727a2d0>

<folium.vector_layers.CircleMarker at 0x7f111727a610>

<folium.vector_layers.CircleMarker at 0x7f111727a510>

<folium.vector_layers.CircleMarker at 0x7f111727a4d0>

<folium.vector_layers.CircleMarker at 0x7f1117284090>

<folium.vector_layers.CircleMarker at 0x7f111727ae50>

<folium.vector_layers.CircleMarker at 0x7f1117284510>

<folium.vector_layers.CircleMarker at 0x7f1117284710>

<folium.vector_layers.CircleMarker at 0x7f1117284410>

<folium.vector_layers.CircleMarker at 0x7f1117284ad0>

<folium.vector_layers.CircleMarker at 0x7f1117284cd0>

<folium.vector_layers.CircleMarker at 0x7f1117208090>

<folium.vector_layers.CircleMarker at 0x7f1117208210>

<folium.vector_layers.CircleMarker at 0x7f1117208350>

<folium.vector_layers.CircleMarker at 0x7f1117284f50>

<folium.vector_layers.CircleMarker at 0x7f1117208850>

<folium.vector_layers.CircleMarker at 0x7f1117208710>

<folium.vector_layers.CircleMarker at 0x7f1117208ad0>

<folium.vector_layers.CircleMarker at 0x7f1117208cd0>

<folium.vector_layers.CircleMarker at 0x7f1117217290>

<folium.vector_layers.CircleMarker at 0x7f11172173d0>

<folium.vector_layers.CircleMarker at 0x7f1117217790>

<folium.vector_layers.CircleMarker at 0x7f1117208f50>

<folium.vector_layers.CircleMarker at 0x7f1117217990>

<folium.vector_layers.CircleMarker at 0x7f1117217950>

<folium.vector_layers.CircleMarker at 0x7f1117217cd0>

<folium.vector_layers.CircleMarker at 0x7f1117220290>

<folium.vector_layers.CircleMarker at 0x7f11172203d0>

<folium.vector_layers.CircleMarker at 0x7f1117220790>

<folium.vector_layers.CircleMarker at 0x7f1117220a10>

<folium.vector_layers.CircleMarker at 0x7f1117217e50>

<folium.vector_layers.CircleMarker at 0x7f11172209d0>

<folium.vector_layers.CircleMarker at 0x7f1117220c10>

<folium.vector_layers.CircleMarker at 0x7f1117228210>

<folium.vector_layers.CircleMarker at 0x7f1117228550>

<folium.vector_layers.CircleMarker at 0x7f1117228490>

<folium.vector_layers.CircleMarker at 0x7f1117228050>

<folium.vector_layers.CircleMarker at 0x7f1117228890>

<folium.vector_layers.CircleMarker at 0x7f1117228dd0>

<folium.vector_layers.CircleMarker at 0x7f1117228a50>

<folium.vector_layers.CircleMarker at 0x7f1117233350>

<folium.vector_layers.CircleMarker at 0x7f1117233590>

<folium.vector_layers.CircleMarker at 0x7f11172332d0>

<folium.vector_layers.CircleMarker at 0x7f11172339d0>

<folium.vector_layers.CircleMarker at 0x7f1117228c90>

<folium.vector_layers.CircleMarker at 0x7f1117233b50>

<folium.vector_layers.CircleMarker at 0x7f1117233c10>

<folium.vector_layers.CircleMarker at 0x7f111723c150>

<folium.vector_layers.CircleMarker at 0x7f111723c510>

<folium.vector_layers.CircleMarker at 0x7f1117233e50>

<folium.vector_layers.CircleMarker at 0x7f111723c9d0>

<folium.vector_layers.CircleMarker at 0x7f111723c5d0>

<folium.vector_layers.CircleMarker at 0x7f111723cd10>

<folium.vector_layers.CircleMarker at 0x7f111723ced0>

<folium.vector_layers.CircleMarker at 0x7f111723e290>

<folium.vector_layers.CircleMarker at 0x7f111723e4d0>

<folium.vector_layers.CircleMarker at 0x7f111723e710>

<folium.vector_layers.CircleMarker at 0x7f111723e550>

<folium.vector_layers.CircleMarker at 0x7f111723eb90>

<folium.vector_layers.CircleMarker at 0x7f111723ee10>

<folium.vector_layers.CircleMarker at 0x7f11171cd0d0>

<folium.vector_layers.CircleMarker at 0x7f11171cdfd0>

<folium.vector_layers.CircleMarker at 0x7f11171cd290>

<folium.vector_layers.CircleMarker at 0x7f11171cd790>

<folium.vector_layers.CircleMarker at 0x7f11171cd850>

<folium.vector_layers.CircleMarker at 0x7f11171cd750>

<folium.vector_layers.CircleMarker at 0x7f11171cda90>

<folium.vector_layers.CircleMarker at 0x7f11171cdd50>

In [167]:
df.loc[0, 'Neighborhood']

'Parkwoods'

In [168]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75245000000007, -79.32990999999998.


Venue Analysis - sample neighborhood

In [104]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=FUD1ZSKEBF30D3424K0LBAZCUWDSLSQQPLYFCAVQ3EQSUJR2&client_secret=0PXRI5XYRPT32PR4YDHC2AKNGTX5O0MX42OOPXN2XDZIB2WU&v=20180605&ll=43.75245000000007,-79.32990999999998&radius=500&limit=100'

In [105]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60115cd00d7dc976f79823bd'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.75695000450007,
    'lng': -79.32369182386579},
   'sw': {'lat': 43.747949995500065, 'lng': -79.33612817613418}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 186,
        'cc': 'CA',
      

In [169]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [170]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) 

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114
2,Corrosion Service Company Limited,Construction & Landscaping,43.752432,-79.334661


In [171]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


In [113]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [172]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )


Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [173]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.75245,-79.32991,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park
4,Victoria Village,43.73057,-79.31306,Memories of Africa,43.726602,-79.312427,Grocery Store


In [174]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,15,15,15,15,15,15
"Alderwood, Long Branch",4,4,4,4,4,4
"Bathurst Manor, Wilson Heights, Downsview North",1,1,1,1,1,1
Bayview Village,6,6,6,6,6,6
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
...,...,...,...,...,...,...
"Willowdale, Willowdale West",5,5,5,5,5,5
Woburn,4,4,4,4,4,4
Woodbine Heights,18,18,18,18,18,18
York Mills West,4,4,4,4,4,4


In [120]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 266 uniques categories.


In [175]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Zoo Exhibit,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [176]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
94,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
95,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
96,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [177]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                venue  freq
0  Chinese Restaurant  0.13
1     Bubble Tea Shop  0.07
2        Skating Rink  0.07
3    Sushi Restaurant  0.07
4       Grocery Store  0.07


----Alderwood, Long Branch----
                 venue  freq
0    Convenience Store  0.25
1         Dance Studio  0.25
2                  Pub  0.25
3                  Gym  0.25
4  Moroccan Restaurant  0.00


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0           Business Service   1.0
1                Zoo Exhibit   0.0
2                  Nightclub   0.0
3  Middle Eastern Restaurant   0.0
4         Miscellaneous Shop   0.0


----Bayview Village----
                        venue  freq
0                       Trail  0.33
1          Golf Driving Range  0.17
2                     Dog Run  0.17
3                        Park  0.17
4  Construction & Landscaping  0.17


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Coffee Shop  0.

In [178]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [179]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Bakery,Department Store,Shopping Mall,Supermarket,Sushi Restaurant,Bubble Tea Shop,Badminton Court,Discount Store,Newsagent
1,"Alderwood, Long Branch",Gym,Convenience Store,Pub,Dance Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Business Service,Yoga Studio,Dumpling Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
3,Bayview Village,Trail,Construction & Landscaping,Park,Golf Driving Range,Dog Run,Fast Food Restaurant,Farmers Market,Field,Farm,Donut Shop
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Thai Restaurant,Coffee Shop,Sandwich Place,Pub,Liquor Store,Juice Bar,Sushi Restaurant,Sports Club,Café


In [180]:
from sklearn.cluster import KMeans

In [192]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 1, 4, 3, 3, 3, 3, 3, 3], dtype=int32)

In [193]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

ValueError: cannot insert Cluster Labels, already exists

In [185]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [197]:
toronto_merged.dropna(inplace=True)

In [198]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)


# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.vector_layers.CircleMarker at 0x7f111ea8df10>

<folium.vector_layers.CircleMarker at 0x7f1117099a10>

<folium.vector_layers.CircleMarker at 0x7f1116c31dd0>

<folium.vector_layers.CircleMarker at 0x7f1116c31710>

<folium.vector_layers.CircleMarker at 0x7f1116c31b10>

<folium.vector_layers.CircleMarker at 0x7f1116c30a10>

<folium.vector_layers.CircleMarker at 0x7f111c131690>

<folium.vector_layers.CircleMarker at 0x7f111ea8c110>

<folium.vector_layers.CircleMarker at 0x7f111ea8c310>

<folium.vector_layers.CircleMarker at 0x7f1116c30150>

<folium.vector_layers.CircleMarker at 0x7f111ea8c810>

<folium.vector_layers.CircleMarker at 0x7f1116bd7110>

<folium.vector_layers.CircleMarker at 0x7f1116c30590>

<folium.vector_layers.CircleMarker at 0x7f111ea8cb50>

<folium.vector_layers.CircleMarker at 0x7f111ea8cc90>

<folium.vector_layers.CircleMarker at 0x7f111ea96350>

<folium.vector_layers.CircleMarker at 0x7f111ea96450>

<folium.vector_layers.CircleMarker at 0x7f111ea96790>

<folium.vector_layers.CircleMarker at 0x7f111ea969d0>

<folium.vector_layers.CircleMarker at 0x7f111ea96b50>

<folium.vector_layers.CircleMarker at 0x7f111ea968d0>

<folium.vector_layers.CircleMarker at 0x7f111ea96f10>

<folium.vector_layers.CircleMarker at 0x7f1116b4b310>

<folium.vector_layers.CircleMarker at 0x7f1116b4b3d0>

<folium.vector_layers.CircleMarker at 0x7f1116b4b610>

<folium.vector_layers.CircleMarker at 0x7f1116b4b910>

<folium.vector_layers.CircleMarker at 0x7f1116b4bc10>

<folium.vector_layers.CircleMarker at 0x7f1116b4bb90>

<folium.vector_layers.CircleMarker at 0x7f1116b4bd50>

<folium.vector_layers.CircleMarker at 0x7f1116b57350>

<folium.vector_layers.CircleMarker at 0x7f1116b571d0>

<folium.vector_layers.CircleMarker at 0x7f1116b57710>

<folium.vector_layers.CircleMarker at 0x7f1116b579d0>

<folium.vector_layers.CircleMarker at 0x7f1116b57a50>

<folium.vector_layers.CircleMarker at 0x7f1116b57210>

<folium.vector_layers.CircleMarker at 0x7f1116b5f110>

<folium.vector_layers.CircleMarker at 0x7f1116b5f350>

<folium.vector_layers.CircleMarker at 0x7f1116b5f550>

<folium.vector_layers.CircleMarker at 0x7f1116b5f490>

<folium.vector_layers.CircleMarker at 0x7f1116b5f910>

<folium.vector_layers.CircleMarker at 0x7f1116b5fb10>

<folium.vector_layers.CircleMarker at 0x7f1116b5fd50>

<folium.vector_layers.CircleMarker at 0x7f1116b57b10>

<folium.vector_layers.CircleMarker at 0x7f1116b693d0>

<folium.vector_layers.CircleMarker at 0x7f1116b69590>

<folium.vector_layers.CircleMarker at 0x7f1116b69350>

<folium.vector_layers.CircleMarker at 0x7f1116b69a10>

<folium.vector_layers.CircleMarker at 0x7f1116b69910>

<folium.vector_layers.CircleMarker at 0x7f1116b69990>

<folium.vector_layers.CircleMarker at 0x7f1116b70110>

<folium.vector_layers.CircleMarker at 0x7f1116b70310>

<folium.vector_layers.CircleMarker at 0x7f1116b70590>

<folium.vector_layers.CircleMarker at 0x7f1116b707d0>

<folium.vector_layers.CircleMarker at 0x7f1116b69e90>

<folium.vector_layers.CircleMarker at 0x7f1116b70790>

<folium.vector_layers.CircleMarker at 0x7f1116b70d10>

<folium.vector_layers.CircleMarker at 0x7f1116b7c0d0>

<folium.vector_layers.CircleMarker at 0x7f1116b7c350>

<folium.vector_layers.CircleMarker at 0x7f1116b7c4d0>

<folium.vector_layers.CircleMarker at 0x7f1116b7c790>

<folium.vector_layers.CircleMarker at 0x7f1116b7c890>

<folium.vector_layers.CircleMarker at 0x7f1116b7ccd0>

<folium.vector_layers.CircleMarker at 0x7f1116b7c5d0>

<folium.vector_layers.CircleMarker at 0x7f1116b7ce10>

<folium.vector_layers.CircleMarker at 0x7f1116b4e350>

<folium.vector_layers.CircleMarker at 0x7f1116b4e5d0>

<folium.vector_layers.CircleMarker at 0x7f1116b4e6d0>

<folium.vector_layers.CircleMarker at 0x7f1116b4e950>

<folium.vector_layers.CircleMarker at 0x7f1116b4ec50>

<folium.vector_layers.CircleMarker at 0x7f1116b4ee90>

<folium.vector_layers.CircleMarker at 0x7f1116b4ee50>

<folium.vector_layers.CircleMarker at 0x7f1116b0d110>

<folium.vector_layers.CircleMarker at 0x7f1116b4ebd0>

<folium.vector_layers.CircleMarker at 0x7f1116b0d210>

<folium.vector_layers.CircleMarker at 0x7f1116b0d650>

<folium.vector_layers.CircleMarker at 0x7f1116b0d910>

<folium.vector_layers.CircleMarker at 0x7f1116b0de50>

<folium.vector_layers.CircleMarker at 0x7f1116b15050>

<folium.vector_layers.CircleMarker at 0x7f1116b0df90>

<folium.vector_layers.CircleMarker at 0x7f1116b15290>

<folium.vector_layers.CircleMarker at 0x7f1116b154d0>

<folium.vector_layers.CircleMarker at 0x7f1116b15910>

<folium.vector_layers.CircleMarker at 0x7f1116b15650>

<folium.vector_layers.CircleMarker at 0x7f1116b15a10>

<folium.vector_layers.CircleMarker at 0x7f1116b1c050>

<folium.vector_layers.CircleMarker at 0x7f1116b1c190>

<folium.vector_layers.CircleMarker at 0x7f1116b1c4d0>

<folium.vector_layers.CircleMarker at 0x7f1116b1c6d0>

<folium.vector_layers.CircleMarker at 0x7f1116b1c110>

<folium.vector_layers.CircleMarker at 0x7f1116b1cc10>

<folium.vector_layers.CircleMarker at 0x7f1116b1cc90>

<folium.vector_layers.CircleMarker at 0x7f1116b26090>

<folium.vector_layers.CircleMarker at 0x7f1116b1cb50>

<folium.vector_layers.CircleMarker at 0x7f1116b26590>

<folium.vector_layers.CircleMarker at 0x7f1116b26690>

<folium.vector_layers.CircleMarker at 0x7f1116b269d0>

<folium.vector_layers.CircleMarker at 0x7f1116b26910>

<folium.vector_layers.CircleMarker at 0x7f1116b26d50>

<folium.vector_layers.CircleMarker at 0x7f1116b34110>

<folium.vector_layers.CircleMarker at 0x7f1116b34310>

<folium.vector_layers.CircleMarker at 0x7f1116b34590>

<folium.vector_layers.CircleMarker at 0x7f1116b347d0>

In [200]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,0.0,Zoo Exhibit,Fast Food Restaurant,Dry Cleaner,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Farmers Market,Farm
56,York,0.0,Fast Food Restaurant,Playground,Coffee Shop,Yoga Studio,Farm,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant


In [201]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,North York,1.0,Business Service,Yoga Studio,Dumpling Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
50,North York,1.0,Home Service,Furniture / Home Store,Business Service,Dumpling Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
61,Central Toronto,1.0,Business Service,Swim School,Bus Line,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
62,Central Toronto,1.0,Home Service,Dry Cleaner,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm
63,York,1.0,Home Service,Seafood Restaurant,Furniture / Home Store,Brewery,Creperie,Cuban Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


In [202]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,North York,2.0,Residential Building (Apartment / Condo),Park,Yoga Studio,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farm
35,East York,2.0,Intersection,Park,Yoga Studio,Farm,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
45,North York,2.0,Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant
68,Central Toronto,2.0,Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant
91,Downtown Toronto,2.0,Park,Playground,Bike Trail,Yoga Studio,Farm,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
98,Etobicoke,2.0,Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant


In [203]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3.0,German Restaurant,Grocery Store,Park,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
2,Downtown Toronto,3.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thrift / Vintage Store,Distribution Center,Bakery,Italian Restaurant,Pub,Electronics Store,Thai Restaurant
3,North York,3.0,Clothing Store,Cosmetics Shop,Bookstore,Furniture / Home Store,Men's Store,Toy / Game Store,Women's Store,Restaurant,American Restaurant,Café
4,Downtown Toronto,3.0,Coffee Shop,Sandwich Place,Italian Restaurant,Burrito Place,Park,Gastropub,Bank,Mediterranean Restaurant,Falafel Restaurant,Café
5,Etobicoke,3.0,Pharmacy,Grocery Store,Park,Café,Shopping Mall,Skating Rink,Bank,Home Service,Gym,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,3.0,Coffee Shop,Hotel,Café,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Seafood Restaurant,Deli / Bodega,American Restaurant
99,Downtown Toronto,3.0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Café,Fast Food Restaurant,Gay Bar,Pub,Smoke Shop,Mediterranean Restaurant
100,East Toronto,3.0,Coffee Shop,Hotel,Café,Sushi Restaurant,Asian Restaurant,Gym,Mediterranean Restaurant,Seafood Restaurant,Restaurant,Italian Restaurant
101,Etobicoke,3.0,Flower Shop,Fast Food Restaurant,Sushi Restaurant,Italian Restaurant,Bank,Coffee Shop,Field,Farmers Market,Farm,Fish & Chips Shop


In [204]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4.0,Food & Drink Shop,Park,Construction & Landscaping,Flea Market,Fish Market,Fish & Chips Shop,Field,Flower Shop,Dumpling Restaurant,Farmers Market
16,York,4.0,Hockey Arena,Trail,Field,Park,Grocery Store,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
18,Scarborough,4.0,Construction & Landscaping,Park,Gym / Fitness Center,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
19,East Toronto,4.0,Pub,Health Food Store,Park,Trail,Yoga Studio,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
26,Scarborough,4.0,Gaming Cafe,Trail,Yoga Studio,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Dry Cleaner
39,North York,4.0,Trail,Construction & Landscaping,Park,Golf Driving Range,Dog Run,Fast Food Restaurant,Farmers Market,Field,Farm,Donut Shop
