In [433]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
import json

import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

import folium
from geopy.geocoders import Nominatim
import xlrd
print('Libraries imported') 

Libraries imported


Because I have no access to Wikipedia, I created my own excel spreadsheet containing Toronto neighborhoods information. 

In [434]:
toronto = 'C:\\Users\\Администратор\\Desktop\\Toronto.xlsx'
toronto_data = pd.read_excel(toronto)
toronto_data.head()

Unnamed: 0,Borough,Neighborhood,Postal Code
0,Central Toronto,Summerhill East,M4T
1,Central Toronto,Rathnelly,M4V
2,Central Toronto,South Hill,M4V
3,Central Toronto,Forest Hill SE,M4V
4,Central Toronto,Deer Park,M4V


I am not able to get the geographical coordinates of the neighborhoods using the Geocoder package(Google services are blocked in China), so we are going to use the link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [435]:
import urllib.request
url = 'http://cocl.us/Geospatial_data'
file_name = 'Geospatial_data.csv'
urllib.request.urlretrieve(url, file_name)


('Geospatial_data.csv', <http.client.HTTPMessage at 0x1b546080>)

In [436]:
lllat = pd.read_csv(file_name)
lllat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Let's merge two dataframes we have

In [437]:
toronto=toronto_data.merge(lllat)
toronto.head()

Unnamed: 0,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,Central Toronto,Summerhill East,M4T,43.689574,-79.38316
1,Central Toronto,Moore Park,M4T,43.689574,-79.38316
2,Central Toronto,Rathnelly,M4V,43.686412,-79.400049
3,Central Toronto,South Hill,M4V,43.686412,-79.400049
4,Central Toronto,Forest Hill SE,M4V,43.686412,-79.400049


In [438]:
toronto.shape

(76, 5)

In order to define an instance of the geocoder,we need to define a user_agent. We will name our agent toronto_explorer

In [439]:
address = 'Toronto,Ontario'
geolocator = Nominatim(timeout=3,user_agent = 'toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto are {},{}.'.format(latitude,longitude))

The geographical coordinate of Toronto are 43.653963,-79.387207.


Creating a map of Toronto

In [440]:
map_toronto = folium.Map(location = [latitude,longitude],zoom_start = 10)

Adding markers to the map

In [441]:
for lat,lng,borough,neighborhood in zip(toronto['Latitude'],toronto['Longitude'],toronto['Borough'],toronto['Neighborhood']):
    label = '{},{}'.format(neighborhood,borough)
    label = folium.Popup(label,parse_html = True)
    folium.CircleMarker([lat,lng],radius = 5,popup = label,color = 'blue',fill = True, fill_color = '#3186cc',fill_opacity = 0.7,parse_html = False).add_to(map_toronto)

In [442]:
map_toronto

Let's simplify the above map and segment and cluster only the neighborhoods in Downtown Toronto. So let's slice the original dataframe and create a new dataframe of the Downtown Toronto data.

In [443]:
downtown_data = toronto[toronto["Borough"]=='Downtown Toronto'].reset_index(drop=True)
del downtown_data['Postal Code']
del downtown_data['Borough']


downtown_data.head(15)

Unnamed: 0,Neighborhood,Latitude,Longitude
0,King and Spadina,43.628947,-79.39442
1,Railway Lands,43.628947,-79.39442
2,Harbourfront West,43.628947,-79.39442
3,Bathurst Quay,43.628947,-79.39442
4,South Niagara,43.628947,-79.39442
5,YTZ,43.628947,-79.39442
6,CN Tower,43.628947,-79.39442
7,Victoria Hotel,43.648198,-79.379817
8,Commerce Court,43.648198,-79.379817
9,Union Station,43.640816,-79.381752


In [444]:
address = 'Downtown Toronto, Toronto'
geolocator = Nominatim(timeout = 3,user_agent = 'toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Downtown Toronto are {},{}.'.format(latitude,longitude))


    

The geographical coordinate of Downtown Toronto are 43.654027,-79.3802003.


 As we did with all of Toronto,let's visualize Downtown Toronto with the neighborhoods in it

In [445]:
map_downtown = folium.Map(location = [latitude,longitude],zoom_start = 11)


Let's add markers to the map

In [446]:
for lat,lng,label in zip(downtown_data['Latitude'],downtown_data['Longitude'],downtown_data['Neighborhood']):
    label = folium.Popup(label,parse_html = True)
    folium.CircleMarker([lat,lng],radius = 5,popup = label,color = 'blue', fill = True,fill_color = '#3186cc',fill_opacity = 0.7,parse_html = False).add_to(map_downtown)
map_downtown

Next we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them

In [447]:
CLIENT_ID = 'SBA3ULRLW0H401RUKMBKKT3PTDKC1IUQ2RRROCP2KG5MY0EI'
CLIENT_SECRET = 'GRSZCACKBADCARVKFKL3UGYX0TQL42KVATSZ4C5RSCCIOTAW'
VERSION = '20180605'


Let's explore the first neighborhood in our dataframe

In [448]:
downtown_data.loc[0,'Neighborhood']

'King and Spadina'

Let's get the neighborhood's latitude and longitude values

In [449]:
neighborhood_latitude = downtown_data.loc[0,'Latitude']
neighborhood_longitude = downtown_data.loc[0,'Longitude']
neighborhood_name = downtown_data.loc[0,'Neighborhood']
print('Latitude and longitude values of {} are {},{}.'.format(neighborhood_name,neighborhood_latitude,neighborhood_longitude))
      

Latitude and longitude values of King and Spadina are 43.6289467,-79.3944199.


Now let's get the top 50 venues that are in King and Spadina within a radius of 1000 meters

In [450]:
latitude = 43.6795626
longitude = -79.37752940000001
LIMIT = 50
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,CLIENT_SECRET,VERSION,latitude,longitude,radius,LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=SBA3ULRLW0H401RUKMBKKT3PTDKC1IUQ2RRROCP2KG5MY0EI&client_secret=GRSZCACKBADCARVKFKL3UGYX0TQL42KVATSZ4C5RSCCIOTAW&v=20180605&ll=43.6795626,-79.37752940000001&radius=1000&limit=50'

In [451]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d9a7bff6e4650002c6f9f91'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Rosedale',
  'headerFullLocation': 'Rosedale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 26,
  'suggestedBounds': {'ne': {'lat': 43.68856260900001,
    'lng': -79.36510816548741},
   'sw': {'lat': 43.670562590999985, 'lng': -79.38995063451262}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4adcb343f964a520e32e21e3',
       'name': 'Summerhill Market',
       'location': {'address': '446 Summerhill Ave',
        'crossStreet': 'btwn. MacLennan Ave. and Glen Rd.',
        'lat': 43.68626482142425,
        'lng': -79.37545823237794,
      

Let's use get_category_type function in order to extract information from items keys

In [452]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a pandas dataframe

In [453]:
venues = results["response"]['groups'][0]['items']
nearby_venues = json_normalize(venues)

#filtering columns
filtered_columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
nearby_venues = nearby_venues.loc[:,filtered_columns]

#filtering the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type,axis=1)

#cleaning columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head(20)

Unnamed: 0,name,categories,lat,lng
0,Summerhill Market,Grocery Store,43.686265,-79.375458
1,Toronto Lawn Tennis Club,Athletics & Sports,43.680667,-79.388559
2,Black Camel,BBQ Joint,43.677016,-79.389367
3,Tinuno,Filipino Restaurant,43.671281,-79.37492
4,Craigleigh Gardens,Park,43.678099,-79.371586
5,Maison Selby,Bistro,43.671232,-79.376618
6,Pie Squared,Pie Shop,43.672143,-79.377856
7,Starbucks,Coffee Shop,43.671082,-79.380756
8,Manulife Financial,Office,43.67207,-79.382449
9,Booster Juice,Smoothie Shop,43.671566,-79.378581



Let's create a function to repeat the same process to all the neighborhoods in Downtown Toronto

In [454]:
def getNearbyVenues(names,latitudes,longitudes,radius = 1000):
    
    venues_list = []
    for name,lat,lng in zip(names,latitudes,longitudes):
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.\
        format(CLIENT_ID,CLIENT_SECRET,VERSION,lat,lng,radius,LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(name,lat,lng,v['venue']['name'],v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        
    nearby_venues.columns = ['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue',\
                                 'Venue Latitude','Venue Longitude','Venue Category']
        
    return(nearby_venues)
        

Now let's write the code to run the above function on each neighborhood and create a new dataframe called downtown_venues


In [455]:
downtown_venues = getNearbyVenues(names= downtown_data['Neighborhood'],\
                                  latitudes=downtown_data['Latitude'],longitudes=downtown_data['Longitude'])


Let's check the size of the resulting dataframe

In [456]:
print(downtown_venues.shape)
downtown_venues.head(50)


(1411, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,King and Spadina,43.628947,-79.39442,Billy Bishop Toronto City Airport (YTZ) (Billy...,43.631541,-79.395868,Airport
1,King and Spadina,43.628947,-79.39442,Toronto Harbour,43.633045,-79.396484,Harbor / Marina
2,King and Spadina,43.628947,-79.39442,The National Yacht Club,43.631754,-79.402185,Harbor / Marina
3,King and Spadina,43.628947,-79.39442,Porter Lounge,43.63068,-79.395756,Airport Lounge
4,King and Spadina,43.628947,-79.39442,Toronto Music Garden,43.636902,-79.393933,Garden
5,King and Spadina,43.628947,-79.39442,Ireland Park,43.634825,-79.395601,Sculpture Garden
6,King and Spadina,43.628947,-79.39442,Starbucks,43.637735,-79.392203,Coffee Shop
7,King and Spadina,43.628947,-79.39442,Coronation Dog Park,43.633725,-79.403961,Dog Run
8,King and Spadina,43.628947,-79.39442,Little Norway Park,43.634871,-79.398474,Park
9,King and Spadina,43.628947,-79.39442,Starbucks,43.636679,-79.400321,Coffee Shop


Let's find out how many unique categories can be curated from all the returned venues

In [457]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 179 uniques categories.


Let's analyze each neighborhood

In [458]:
#one hot encoding

downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']],prefix="",prefix_sep="")

#adding neighborhood column back to dataframe

downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood']

# let's move neighborhood column to the first column


# get a list of columns
cols = list(downtown_onehot)
# move the column to head of list using index, pop and insert
cols.insert(0, cols.pop(cols.index('Neighborhood')))


downtown_onehot = downtown_onehot.loc[:, cols]
downtown_onehot.shape
downtown_onehot.head(100)
                                  

Unnamed: 0,Neighborhood,Airport,Airport Lounge,American Restaurant,Animal Shelter,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Butcher,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Dog Run,Doner Restaurant,Dumpling Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Filipino Restaurant,Fish & Chips Shop,Fish Market,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Hobby Shop,Hostel,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Liquor Store,Lounge,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Movie Theater,Museum,Music School,Music Store,Music Venue,New American Restaurant,Noodle House,Office,Opera House,Organic Grocery,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pie Shop,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Restaurant,Rock Club,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,South American Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Theme Restaurant,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,King and Spadina,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,King and Spadina,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,King and Spadina,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


The table we got is still hardly readable so let's group rows by neighborhood and by taking the mean of the frequency of occurence of each category

In [460]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped
downtown_grouped.shape

(34, 179)

Let's print each neighborhood along with top 3 most common venues 

In [461]:
num_top_venues = 3

for hood in downtown_grouped['Neighborhood']:
    print('----'+hood+'----')
    temp = downtown_grouped[downtown_grouped['Neighborhood']==hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq',ascending = False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide----
         venue  freq
0         Café  0.08
1  Coffee Shop  0.08
2        Hotel  0.04


---- Design Exchange----
         venue  freq
0         Café  0.12
1  Coffee Shop  0.12
2        Hotel  0.08


----Bathurst Quay ----
             venue  freq
0  Harbor / Marina  0.13
1      Coffee Shop  0.13
2             Café  0.13


----Berczy Park----
      venue  freq
0  Beer Bar  0.06
1      Café  0.06
2     Hotel  0.04


----CN Tower      ----
             venue  freq
0  Harbor / Marina  0.13
1      Coffee Shop  0.13
2             Café  0.13


----Cabbagetown----
         venue  freq
0         Park  0.08
1  Coffee Shop  0.05
2    Gastropub  0.05


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.10
1                 Café  0.06
2  Japanese Restaurant  0.04


----Chinatown----
                           venue  freq
0                           Café  0.12
1  Vegetarian / Vegan Restaurant  0.06
2             Mexican Restaurant  0.06


----Christie--

Let's put it into a pandas dataframe
First, let's write a function to sort the venues in descending order 

In [462]:
def return_most_common_venues(row,num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues] 

Now let's create the new dataframe and display the top 10 venues for each neighborhood

In [466]:
num_top_venues = 10
indicators = ['st','nd','rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1,indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind,1:]=return_most_common_venues(downtown_grouped.iloc[ind,:],num_top_venues)

neighborhoods_venues_sorted.head()    

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Concert Hall,American Restaurant,Hotel,Asian Restaurant,Steakhouse,Gastropub,Restaurant,Jazz Club
1,Design Exchange,Café,Coffee Shop,Hotel,Restaurant,Steakhouse,American Restaurant,Concert Hall,Deli / Bodega,Gastropub,Basketball Stadium
2,Bathurst Quay,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
3,Berczy Park,Café,Beer Bar,Seafood Restaurant,Farmers Market,Coffee Shop,Cocktail Bar,Bakery,Hotel,Jazz Club,Bistro
4,CN Tower,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run


In [467]:
neighborhoods_venues_sorted.shape

(34, 11)

In [468]:
neighborhoods_venues_sorted.head(34)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Concert Hall,American Restaurant,Hotel,Asian Restaurant,Steakhouse,Gastropub,Restaurant,Jazz Club
1,Design Exchange,Café,Coffee Shop,Hotel,Restaurant,Steakhouse,American Restaurant,Concert Hall,Deli / Bodega,Gastropub,Basketball Stadium
2,Bathurst Quay,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
3,Berczy Park,Café,Beer Bar,Seafood Restaurant,Farmers Market,Coffee Shop,Cocktail Bar,Bakery,Hotel,Jazz Club,Bistro
4,CN Tower,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
5,Cabbagetown,Park,Restaurant,Diner,Coffee Shop,Gastropub,Japanese Restaurant,Café,Pub,Caribbean Restaurant,Pool
6,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Pizza Place,Tea Room,Portuguese Restaurant,Breakfast Spot,Falafel Restaurant
7,Chinatown,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Burger Joint,Bar,Caribbean Restaurant
8,Christie,Café,Korean Restaurant,Cocktail Bar,Grocery Store,Coffee Shop,Comedy Club,Indian Restaurant,Pizza Place,Design Studio,South American Restaurant
9,Church and Wellesley,Coffee Shop,Gastropub,Gay Bar,Restaurant,Burger Joint,Thai Restaurant,Men's Store,Dance Studio,Salon / Barbershop,Bubble Tea Shop



So as we can see the most common venue in Downtown is cafe, the second top venue is coffee shop, the third top venue is restaraunt and especially Vegan Restaraunts are popular in Downtown



Now let's cluster our neighborhoods

In [469]:
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood',1)

Let's run k-means clustering

In [470]:
kmeans = KMeans(n_clusters=kclusters,random_state = 0).fit(downtown_grouped_clustering)

Now we are going to check cluster labels generated for each row in the dataframe

In [471]:
kmeans.labels_[0:10]

array([0, 0, 1, 0, 1, 0, 0, 3, 3, 0])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [472]:
neighborhoods_venues_sorted.insert(0,'Cluster Labels',kmeans.labels_)
downtown_merged = downtown_data

Let's merge downtown_grouped with downtown data to add latitude and longitude for each neighborhood

In [473]:
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'),on = 'Neighborhood')
downtown_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,King and Spadina,43.628947,-79.39442,1,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
1,Railway Lands,43.628947,-79.39442,1,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
2,Harbourfront West,43.628947,-79.39442,1,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
3,Bathurst Quay,43.628947,-79.39442,1,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
4,South Niagara,43.628947,-79.39442,1,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run


Finally, let's visualize the resulting clusters

In [474]:
#creating a map

map_clusters = folium.Map(location = [latitude,longitude],zoom_start = 11)

# setting color scheme for the clusters

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#adding markers to the map

markers_colors = []
for lat,lon,poi,cluster in zip(downtown_merged['Latitude'],downtown_merged['Longitude'],downtown_merged['Neighborhood'],\
                               downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi)+'CLuster'+str(cluster),parse_html = True)
    folium.CircleMarker([lat,lon],radius = 5, popup = label,\
                        color = rainbow[cluster-1],fill = True,fill_color = rainbow[cluster-1],\
                        fill_opacity = 0.7).add_to(map_clusters)

map_clusters    


Now we can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories,we can then assign a name to each cluster

# Cluster 1

In [482]:
downtown_merged.loc[downtown_merged['Cluster Labels']==0,downtown_merged.columns[[0]+list(range(4,downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Victoria Hotel,Café,Coffee Shop,Restaurant,Hotel,Steakhouse,American Restaurant,Gastropub,Deli / Bodega,Beer Bar,Gym
8,Commerce Court,Café,Coffee Shop,Restaurant,Hotel,Steakhouse,American Restaurant,Gastropub,Deli / Bodega,Beer Bar,Gym
17,Adelaide,Coffee Shop,Café,Concert Hall,American Restaurant,Hotel,Asian Restaurant,Steakhouse,Gastropub,Restaurant,Jazz Club
18,King,Coffee Shop,Café,Concert Hall,American Restaurant,Hotel,Asian Restaurant,Steakhouse,Gastropub,Restaurant,Jazz Club
19,Richmond,Coffee Shop,Café,Concert Hall,American Restaurant,Hotel,Asian Restaurant,Steakhouse,Gastropub,Restaurant,Jazz Club
20,Cabbagetown,Park,Restaurant,Diner,Coffee Shop,Gastropub,Japanese Restaurant,Café,Pub,Caribbean Restaurant,Pool
21,St. James Town,Park,Restaurant,Diner,Coffee Shop,Gastropub,Japanese Restaurant,Café,Pub,Caribbean Restaurant,Pool
22,Berczy Park,Café,Beer Bar,Seafood Restaurant,Farmers Market,Coffee Shop,Cocktail Bar,Bakery,Hotel,Jazz Club,Bistro
23,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Pizza Place,Tea Room,Portuguese Restaurant,Breakfast Spot,Falafel Restaurant
25,Church and Wellesley,Coffee Shop,Gastropub,Gay Bar,Restaurant,Burger Joint,Thai Restaurant,Men's Store,Dance Studio,Salon / Barbershop,Bubble Tea Shop


Top 3 Venues of CLuster 1: Coffee Shop, Cafe,Concert Hall

# Cluster 2

In [483]:
downtown_merged.loc[downtown_merged['Cluster Labels']==1,downtown_merged.columns[[0]+list(range(4,downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,King and Spadina,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
1,Railway Lands,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
2,Harbourfront West,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
3,Bathurst Quay,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
4,South Niagara,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
5,YTZ,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run
6,CN Tower,Café,Harbor / Marina,Coffee Shop,Airport,Airport Lounge,Track,Park,Dance Studio,Sculpture Garden,Dog Run


Top 3 Venues of CLuster 2: Cafe, Harbor/Marina, Coffee Shop

# Cluster 3

In [485]:
downtown_merged.loc[downtown_merged['Cluster Labels']==2,downtown_merged.columns[[0]+list(range(4,downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Union Station,Hotel,Coffee Shop,Park,Café,Baseball Stadium,Aquarium,Plaza,Fried Chicken Joint,Brewery,New American Restaurant
10,Toronto Island,Hotel,Coffee Shop,Park,Café,Baseball Stadium,Aquarium,Plaza,Fried Chicken Joint,Brewery,New American Restaurant
11,Harbourfront East,Hotel,Coffee Shop,Park,Café,Baseball Stadium,Aquarium,Plaza,Fried Chicken Joint,Brewery,New American Restaurant
26,Rosedale,Coffee Shop,Grocery Store,Park,Bank,Hostel,Bistro,Pie Shop,Playground,Office,Candy Store


Top 3 Venues of CLuster 3: Hotel,Coffee Shop,Park

# Cluster 4


In [486]:
downtown_merged.loc[downtown_merged['Cluster Labels']==3,downtown_merged.columns[[0]+list(range(4,downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Chinatown,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Burger Joint,Bar,Caribbean Restaurant
13,Grange Park,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Burger Joint,Bar,Caribbean Restaurant
14,Kensington Market,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Burger Joint,Bar,Caribbean Restaurant
24,Christie,Café,Korean Restaurant,Cocktail Bar,Grocery Store,Coffee Shop,Comedy Club,Indian Restaurant,Pizza Place,Design Studio,South American Restaurant
30,Harbord,Café,Vegetarian / Vegan Restaurant,Bakery,Japanese Restaurant,Bookstore,Park,Pizza Place,Restaurant,Hostel,Ramen Restaurant
31,University of Toronto,Café,Vegetarian / Vegan Restaurant,Bakery,Japanese Restaurant,Bookstore,Park,Pizza Place,Restaurant,Hostel,Ramen Restaurant


Top 3 Venues of CLuster 4:Cafe, Vegeterian Restaurant, Mexican Restaraunt

# Cluster 5

In [487]:
downtown_merged.loc[downtown_merged['Cluster Labels']==4,downtown_merged.columns[[0]+list(range(4,downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Port of Toronto,Coffee Shop,Café,Park,Bakery,Theater,Breakfast Spot,Mexican Restaurant,Italian Restaurant,Pub,Restaurant
16,Regent Park,Coffee Shop,Café,Park,Bakery,Theater,Breakfast Spot,Mexican Restaurant,Italian Restaurant,Pub,Restaurant


Top 3 Venues of CLuster 5:Coffee Shop,Cafe,Park