**Data**

In [1]:
#Downloading Vancouver neighborhood data from Wikipedia site and scraping data to put it in the right format and dataframe
import urllib.request

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_V'
with urllib.request.urlopen(url) as response:
  htmldata = response.read()

In [2]:
!pip install beautifulsoup4
from bs4 import BeautifulSoup

soup = BeautifulSoup(htmldata)
import numpy as np
import pandas as pd



In [3]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        continue
    cell['PostalCode'] = row.text[:3]
    cell['Borough'] = row.span.a.text
    if row.span.br:
        post_br = ''.join(row.span.get_text('\n').split('\n')[1:])
        if '(' in post_br:
            cell['Neighborhood'] = ((((post_br.split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        else:
            cell['Neighborhood'] = post_br
    else:
        cell['Neighborhood'] = cell['Borough']

    table_contents.append(cell)

df=pd.DataFrame(table_contents)


In [4]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,V1A,Kimberley,Kimberley
1,V2A,Penticton,Penticton
2,V3A,Langley Township,Langley City
3,V4A,Surrey,Southwest
4,V5A,Burnaby,"Government Road, Lake City, SFU, Burnaby Mountain"
...,...,...,...
170,V5Z,Vancouver,"East Fairview, South Cambie"
171,V6Z,Vancouver,SW Downtown
172,V7Z,Sechelt,Sechelt
173,V8Z,Victoria,Middle Saanich


In [5]:
#Sanity check individual postal codes - for data validation
df.loc[df['PostalCode'].isin(['V9A', 'V6V', 'V7R', 'V8W'])]

Unnamed: 0,PostalCode,Borough,Neighborhood
8,V9A,Victoria,"Vic West, Esquimalt Canadian Forces"
110,V7R,North Vancouver (district municipality),Northwest
136,V6V,Richmond,Northeast
147,V8W,Victoria,"Downtown, Chinatown British Columbia Provincia..."


In [6]:
df.shape

(175, 3)

In [7]:
#Getting latitude and longitude coordinates for each postal code using the CA.zip file under the GeoNames.org website (http://download.geonames.org/export/zip/) and converting it into dataframe
lat_long_neighborhood ='C:/Temp/CA.txt'
df1 = pd.read_csv(lat_long_neighborhood, sep = '\t', header=None, index_col=False,names=['a','PostalCode','b','c','d','e','f','g','h','Latitude','Longitude','i'])

In [8]:
df1

Unnamed: 0,a,PostalCode,b,c,d,e,f,g,h,Latitude,Longitude,i
0,CA,T0A,Eastern Alberta (St. Paul),Alberta,AB,,,,,54.7660,-111.7174,6.0
1,CA,T0B,Wainwright Region (Tofield),Alberta,AB,,,,,53.0727,-111.5816,6.0
2,CA,T0C,Central Alberta (Stettler),Alberta,AB,,,,,52.1431,-111.6941,5.0
3,CA,T0E,Western Alberta (Jasper),Alberta,AB,,,,,53.6758,-115.0948,5.0
4,CA,T0G,North Central Alberta (Slave Lake),Alberta,AB,,,,,55.6993,-114.4529,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1651,CA,S9V,Lloydminster,Saskatchewan,SK,,,,,53.2835,-110.0016,4.0
1652,CA,S9X,Meadow Lake,Saskatchewan,SK,,,,,54.1335,-108.4347,4.0
1653,CA,Y0A,Southeastern Yukon (Watson Lake),Yukon,YT,,,,,61.5793,-131.1481,6.0
1654,CA,Y0B,Central Yukon (Dawson City),Yukon,YT,,,,,64.6450,-137.5360,6.0


In [9]:
#Extracting only data from df1 that is necessary
df2 = df1[['PostalCode','Latitude','Longitude']]
df2

Unnamed: 0,PostalCode,Latitude,Longitude
0,T0A,54.7660,-111.7174
1,T0B,53.0727,-111.5816
2,T0C,52.1431,-111.6941
3,T0E,53.6758,-115.0948
4,T0G,55.6993,-114.4529
...,...,...,...
1651,S9V,53.2835,-110.0016
1652,S9X,54.1335,-108.4347
1653,Y0A,61.5793,-131.1481
1654,Y0B,64.6450,-137.5360


In [10]:
#Combining the Vancouver neighborhood data and the latitude and longitude data
new_df = pd.merge(df, df2, on=["PostalCode"], how='left')
new_df.shape
new_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,V1A,Kimberley,Kimberley,49.6832,-115.9855
1,V2A,Penticton,Penticton,49.4806,-119.5858
2,V3A,Langley Township,Langley City,49.0997,-122.6526
3,V4A,Surrey,Southwest,49.0374,-122.8299
4,V5A,Burnaby,"Government Road, Lake City, SFU, Burnaby Mountain",49.2640,-122.9369
...,...,...,...,...,...
170,V5Z,Vancouver,"East Fairview, South Cambie",49.2475,-123.1210
171,V6Z,Vancouver,SW Downtown,49.2767,-123.1300
172,V7Z,Sechelt,Sechelt,,
173,V8Z,Victoria,Middle Saanich,48.4993,-123.4003


In [11]:
#For this analysis, I am focusing only on the Vancouver borough for exploring and clustering the neighborhoods
vancouver_data = new_df[new_df['Borough'] == 'Vancouver'].reset_index(drop=True)
vancouver_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,V6A,Vancouver,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908
1,V6B,Vancouver,"NE Downtown, Gastown, Harbour Centre, Internat...",49.2788,-123.1139
2,V6C,Vancouver,"Waterfront, Coal Harbour, Canada Place",49.2866,-123.1158
3,V6E,Vancouver,"SE West End, Davie Village",49.2833,-123.1298
4,V6G,Vancouver,"NW West End, Stanley Park",49.299,-123.1408
5,V6H,Vancouver,"West Fairview, Granville Island, NE Shaughnessy",49.2559,-123.1322
6,V6J,Vancouver,"NW Shaughnessy, East Kitsilano, Quilchena",49.2603,-123.146
7,V5K,Vancouver,North Hastings-Sunrise,49.2807,-123.0397
8,V6K,Vancouver,"Central Kitsilano, Greektown",49.2646,-123.1648
9,V5L,Vancouver,North Grandview-Woodland,49.2795,-123.0667


In [12]:
#Import json and geopy
import json
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [13]:
#Import requests and Matplotlib
import requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

In [14]:
#Import k-means from clustering stage
from sklearn.cluster import KMeans

In [15]:
#Import Folium
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [16]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'AIGZH4DO4K2YGUEVSCJUM4JZZPG0SXOGIX1HHPKUVRVGL51O'
CLIENT_SECRET = 'R1DM4UVWGZ31IO2NJIWPLSO5UZFIZX35YJOXNRYAFC1YGUEM'
VERSION = '20180605'
LIMIT = 100

In [17]:
#Exploring neighborhoods in boroughs that contain the word Vancouver, i.e. dataframe vancouver_data
#Code to repeat the same process to all neighborhoods in dataframe vancouver_data
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
#Code to run the above function on each neighborhood and create a new dataframe called vancouver_venues
vancouver_venues = getNearbyVenues(names=vancouver_data['Neighborhood'],
                                   latitudes=vancouver_data['Latitude'],
                                   longitudes=vancouver_data['Longitude']
                                  )

Strathcona, Chinatown, Downtown Eastside
NE Downtown, Gastown, Harbour Centre, International Village, Victory Square, Yaletown
Waterfront, Coal Harbour, Canada Place
SE West End, Davie Village
NW West End, Stanley Park
West Fairview, Granville Island, NE Shaughnessy
NW Shaughnessy, East Kitsilano, Quilchena
North Hastings-Sunrise
Central Kitsilano, Greektown
North Grandview-Woodland
NW Arbutus Ridge, NE Dunbar-Southlands
South Hastings-Sunrise, North Renfrew-Collingwood
South Shaughnessy, NW Oakridge, NE Kerrisdale, SE Arbutus Ridge
South Grandview-Woodland, NE Kensington-Cedar Cottage
West Kerrisdale, South Dunbar-Southlands, Musqueam
SE Kensington-Cedar Cottage, Victoria-Fraserview
SE Kerrisdale, SW Oakridge, West Marpole
South Renfrew-Collingwood
West Kitsilano, West Point Grey, Jericho
Killarney
NW Dunbar-Southlands, Chaldecutt, South University Endowment Lands
East Mount Pleasant
UBC
West Kensington-Cedar Cottage, NE Riley Park-Little Mountain
SE Riley Park-Little Mountain, SW Ken

In [19]:
#Checking the size of the resulting dataframe
print(vancouver_venues.shape)
vancouver_venues.head()

(756, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,Union Market,49.277371,-123.086989,Deli / Bodega
1,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,MacLean Park,49.278809,-123.088546,Park
2,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,Finch’s Market,49.278565,-123.093473,Sandwich Place
3,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,The Juice Truck,49.281281,-123.09212,Food Truck
4,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,The Pie Shoppe,49.278286,-123.097104,Pie Shop


In [20]:
# one hot encoding
vancouver_onehot = pd.get_dummies(vancouver_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
vancouver_onehot['Neighborhood'] = vancouver_venues['Neighborhood'] 

# move neighborhood column to the first column
first_column = vancouver_onehot.pop('Neighborhood')
vancouver_onehot.insert(0,'Neighborhood', first_column)

vancouver_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Thrift / Vintage Store,Toy / Game Store,Trade School,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,"Strathcona, Chinatown, Downtown Eastside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Strathcona, Chinatown, Downtown Eastside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Strathcona, Chinatown, Downtown Eastside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Strathcona, Chinatown, Downtown Eastside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Strathcona, Chinatown, Downtown Eastside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
#Checking the new dataframe size
vancouver_onehot.shape

(756, 177)

In [22]:
#Grouping rows by neighborhood and by taking the mean of the frequency of occurence of each category
vancouver_grouped = vancouver_onehot.groupby('Neighborhood').mean().reset_index()
vancouver_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Thrift / Vintage Store,Toy / Game Store,Trade School,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,Bentall Centre,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Central Kitsilano, Greektown",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,...,0.0,0.035714,0.0,0.0,0.035714,0.035714,0.0,0.035714,0.0,0.035714
2,"East Fairview, South Cambie",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0
3,East Mount Pleasant,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,...,0.038462,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.038462,0.0
4,Killarney,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"NE Downtown, Gastown, Harbour Centre, Internat...",0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.025,0.0,0.0,0.0,0.025,0.0,0.0,0.0
6,"NW Arbutus Ridge, NE Dunbar-Southlands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"NW Dunbar-Southlands, Chaldecutt, South Univer...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"NW Shaughnessy, East Kitsilano, Quilchena",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.090909
9,"NW West End, Stanley Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
#Function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [24]:
#Create a new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = vancouver_grouped['Neighborhood']

for ind in np.arange(vancouver_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(vancouver_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bentall Centre,Airport Terminal,American Restaurant,Outdoor Sculpture,Irish Pub,Plaza,Gym,Gastropub,Breakfast Spot,Yoga Studio,Fast Food Restaurant
1,"Central Kitsilano, Greektown",Café,Coffee Shop,Yoga Studio,Spa,Southern / Soul Food Restaurant,Diner,Burger Joint,Liquor Store,Restaurant,Pub
2,"East Fairview, South Cambie",Coffee Shop,Park,Sushi Restaurant,Cantonese Restaurant,Bank,Liquor Store,Juice Bar,Malay Restaurant,Gift Shop,Chinese Restaurant
3,East Mount Pleasant,Sushi Restaurant,Ethiopian Restaurant,Vietnamese Restaurant,Park,Sandwich Place,Liquor Store,Pub,Pizza Place,Sports Bar,Cocktail Bar
4,Killarney,Farmers Market,Pizza Place,Shopping Mall,Salon / Barbershop,Bus Stop,Liquor Store,Chinese Restaurant,Fast Food Restaurant,Gas Station,Sushi Restaurant


**K-Means Clustering**

In [25]:
#Running k-means to cluster the neighborhood into 5 clusters
# set number of clusters
kclusters = 5

vancouver_grouped_clustering = vancouver_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(vancouver_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 4, 3, 0, 0])

In [26]:
#Creating a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

#Merging vancouver_data and neighborhoods_venues_sorted to add latitude/longitude for each neighborhood
vancouver_merged = vancouver_data
vancouver_merged = vancouver_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

vancouver_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V6A,Vancouver,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,0.0,Deli / Bodega,Seafood Restaurant,Park,Vietnamese Restaurant,Café,Sandwich Place,Asian Restaurant,Food Truck,Pie Shop,Coffee Shop
1,V6B,Vancouver,"NE Downtown, Gastown, Harbour Centre, Internat...",49.2788,-123.1139,0.0,Hotel,Restaurant,Concert Hall,Taco Place,Breakfast Spot,Burger Joint,Spa,Italian Restaurant,Clothing Store,Gym / Fitness Center
2,V6C,Vancouver,"Waterfront, Coal Harbour, Canada Place",49.2866,-123.1158,0.0,Hotel,Coffee Shop,Café,Restaurant,Hotel Bar,Boat or Ferry,Plaza,Gym,Spa,Seafood Restaurant
3,V6E,Vancouver,"SE West End, Davie Village",49.2833,-123.1298,0.0,Japanese Restaurant,Bakery,Restaurant,Gay Bar,Dessert Shop,Hotel,Sushi Restaurant,Food Truck,Coffee Shop,Greek Restaurant
4,V6G,Vancouver,"NW West End, Stanley Park",49.299,-123.1408,0.0,Trail,Park,Bus Stop,Outdoor Sculpture,Playground,Garden,Event Space,Farmers Market,Fish Market,Fish & Chips Shop
5,V6H,Vancouver,"West Fairview, Granville Island, NE Shaughnessy",49.2559,-123.1322,0.0,Park,Sushi Restaurant,Historic Site,Pet Store,Physical Therapist,Pizza Place,Bakery,Japanese Restaurant,Breakfast Spot,Fish & Chips Shop
6,V6J,Vancouver,"NW Shaughnessy, East Kitsilano, Quilchena",49.2603,-123.146,0.0,Yoga Studio,Bar,Coffee Shop,Electronics Store,Japanese Restaurant,Tennis Court,Breakfast Spot,Food & Drink Shop,Furniture / Home Store,Gourmet Shop
7,V5K,Vancouver,North Hastings-Sunrise,49.2807,-123.0397,0.0,Theme Park Ride / Attraction,Beer Garden,Event Space,Theme Park,Sushi Restaurant,Inn,Pizza Place,Gas Station,Stadium,Market
8,V6K,Vancouver,"Central Kitsilano, Greektown",49.2646,-123.1648,0.0,Café,Coffee Shop,Yoga Studio,Spa,Southern / Soul Food Restaurant,Diner,Burger Joint,Liquor Store,Restaurant,Pub
9,V5L,Vancouver,North Grandview-Woodland,49.2795,-123.0667,0.0,Chinese Restaurant,Café,Brewery,Theater,Coffee Shop,Pizza Place,Fried Chicken Joint,Bus Stop,Sushi Restaurant,Electronics Store


In [27]:
#From the vancouver_merged dataframe, I noticed that there is 1 neighborhood (UBC) without any venue data from Foursquare.
#I've decided to remove that neighborhood from the clustering analysis.
vancouver_merged = vancouver_merged.dropna().reset_index(drop=True)

In [28]:
vancouver_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V6A,Vancouver,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,0.0,Deli / Bodega,Seafood Restaurant,Park,Vietnamese Restaurant,Café,Sandwich Place,Asian Restaurant,Food Truck,Pie Shop,Coffee Shop
1,V6B,Vancouver,"NE Downtown, Gastown, Harbour Centre, Internat...",49.2788,-123.1139,0.0,Hotel,Restaurant,Concert Hall,Taco Place,Breakfast Spot,Burger Joint,Spa,Italian Restaurant,Clothing Store,Gym / Fitness Center
2,V6C,Vancouver,"Waterfront, Coal Harbour, Canada Place",49.2866,-123.1158,0.0,Hotel,Coffee Shop,Café,Restaurant,Hotel Bar,Boat or Ferry,Plaza,Gym,Spa,Seafood Restaurant
3,V6E,Vancouver,"SE West End, Davie Village",49.2833,-123.1298,0.0,Japanese Restaurant,Bakery,Restaurant,Gay Bar,Dessert Shop,Hotel,Sushi Restaurant,Food Truck,Coffee Shop,Greek Restaurant
4,V6G,Vancouver,"NW West End, Stanley Park",49.299,-123.1408,0.0,Trail,Park,Bus Stop,Outdoor Sculpture,Playground,Garden,Event Space,Farmers Market,Fish Market,Fish & Chips Shop
5,V6H,Vancouver,"West Fairview, Granville Island, NE Shaughnessy",49.2559,-123.1322,0.0,Park,Sushi Restaurant,Historic Site,Pet Store,Physical Therapist,Pizza Place,Bakery,Japanese Restaurant,Breakfast Spot,Fish & Chips Shop
6,V6J,Vancouver,"NW Shaughnessy, East Kitsilano, Quilchena",49.2603,-123.146,0.0,Yoga Studio,Bar,Coffee Shop,Electronics Store,Japanese Restaurant,Tennis Court,Breakfast Spot,Food & Drink Shop,Furniture / Home Store,Gourmet Shop
7,V5K,Vancouver,North Hastings-Sunrise,49.2807,-123.0397,0.0,Theme Park Ride / Attraction,Beer Garden,Event Space,Theme Park,Sushi Restaurant,Inn,Pizza Place,Gas Station,Stadium,Market
8,V6K,Vancouver,"Central Kitsilano, Greektown",49.2646,-123.1648,0.0,Café,Coffee Shop,Yoga Studio,Spa,Southern / Soul Food Restaurant,Diner,Burger Joint,Liquor Store,Restaurant,Pub
9,V5L,Vancouver,North Grandview-Woodland,49.2795,-123.0667,0.0,Chinese Restaurant,Café,Brewery,Theater,Coffee Shop,Pizza Place,Fried Chicken Joint,Bus Stop,Sushi Restaurant,Electronics Store


In [29]:
#Changing data type of Cluster Labels from float to integer
vancouver_merged = vancouver_merged.astype({'Cluster Labels': np.int})
vancouver_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V6A,Vancouver,"Strathcona, Chinatown, Downtown Eastside",49.2779,-123.0908,0,Deli / Bodega,Seafood Restaurant,Park,Vietnamese Restaurant,Café,Sandwich Place,Asian Restaurant,Food Truck,Pie Shop,Coffee Shop
1,V6B,Vancouver,"NE Downtown, Gastown, Harbour Centre, Internat...",49.2788,-123.1139,0,Hotel,Restaurant,Concert Hall,Taco Place,Breakfast Spot,Burger Joint,Spa,Italian Restaurant,Clothing Store,Gym / Fitness Center
2,V6C,Vancouver,"Waterfront, Coal Harbour, Canada Place",49.2866,-123.1158,0,Hotel,Coffee Shop,Café,Restaurant,Hotel Bar,Boat or Ferry,Plaza,Gym,Spa,Seafood Restaurant
3,V6E,Vancouver,"SE West End, Davie Village",49.2833,-123.1298,0,Japanese Restaurant,Bakery,Restaurant,Gay Bar,Dessert Shop,Hotel,Sushi Restaurant,Food Truck,Coffee Shop,Greek Restaurant
4,V6G,Vancouver,"NW West End, Stanley Park",49.299,-123.1408,0,Trail,Park,Bus Stop,Outdoor Sculpture,Playground,Garden,Event Space,Farmers Market,Fish Market,Fish & Chips Shop
5,V6H,Vancouver,"West Fairview, Granville Island, NE Shaughnessy",49.2559,-123.1322,0,Park,Sushi Restaurant,Historic Site,Pet Store,Physical Therapist,Pizza Place,Bakery,Japanese Restaurant,Breakfast Spot,Fish & Chips Shop
6,V6J,Vancouver,"NW Shaughnessy, East Kitsilano, Quilchena",49.2603,-123.146,0,Yoga Studio,Bar,Coffee Shop,Electronics Store,Japanese Restaurant,Tennis Court,Breakfast Spot,Food & Drink Shop,Furniture / Home Store,Gourmet Shop
7,V5K,Vancouver,North Hastings-Sunrise,49.2807,-123.0397,0,Theme Park Ride / Attraction,Beer Garden,Event Space,Theme Park,Sushi Restaurant,Inn,Pizza Place,Gas Station,Stadium,Market
8,V6K,Vancouver,"Central Kitsilano, Greektown",49.2646,-123.1648,0,Café,Coffee Shop,Yoga Studio,Spa,Southern / Soul Food Restaurant,Diner,Burger Joint,Liquor Store,Restaurant,Pub
9,V5L,Vancouver,North Grandview-Woodland,49.2795,-123.0667,0,Chinese Restaurant,Café,Brewery,Theater,Coffee Shop,Pizza Place,Fried Chicken Joint,Bus Stop,Sushi Restaurant,Electronics Store


In [30]:
vancouver_merged['Cluster Labels'].dtype

dtype('int32')

In [31]:
#Finding geographical coordinates of Vancouver
address = 'Vancouver'

geolocator = Nominatim(user_agent="vancouver_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Vancouver are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Vancouver are 49.2608724, -123.1139529.


In [32]:
#Data Visualization
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(vancouver_merged['Latitude'], vancouver_merged['Longitude'], vancouver_merged['Neighborhood'], vancouver_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

**Looking into the details of each cluster**

In [33]:
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 0, vancouver_merged.columns[[0] + [1] + [2] + list(range(5, vancouver_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V6A,Vancouver,"Strathcona, Chinatown, Downtown Eastside",0,Deli / Bodega,Seafood Restaurant,Park,Vietnamese Restaurant,Café,Sandwich Place,Asian Restaurant,Food Truck,Pie Shop,Coffee Shop
1,V6B,Vancouver,"NE Downtown, Gastown, Harbour Centre, Internat...",0,Hotel,Restaurant,Concert Hall,Taco Place,Breakfast Spot,Burger Joint,Spa,Italian Restaurant,Clothing Store,Gym / Fitness Center
2,V6C,Vancouver,"Waterfront, Coal Harbour, Canada Place",0,Hotel,Coffee Shop,Café,Restaurant,Hotel Bar,Boat or Ferry,Plaza,Gym,Spa,Seafood Restaurant
3,V6E,Vancouver,"SE West End, Davie Village",0,Japanese Restaurant,Bakery,Restaurant,Gay Bar,Dessert Shop,Hotel,Sushi Restaurant,Food Truck,Coffee Shop,Greek Restaurant
4,V6G,Vancouver,"NW West End, Stanley Park",0,Trail,Park,Bus Stop,Outdoor Sculpture,Playground,Garden,Event Space,Farmers Market,Fish Market,Fish & Chips Shop
5,V6H,Vancouver,"West Fairview, Granville Island, NE Shaughnessy",0,Park,Sushi Restaurant,Historic Site,Pet Store,Physical Therapist,Pizza Place,Bakery,Japanese Restaurant,Breakfast Spot,Fish & Chips Shop
6,V6J,Vancouver,"NW Shaughnessy, East Kitsilano, Quilchena",0,Yoga Studio,Bar,Coffee Shop,Electronics Store,Japanese Restaurant,Tennis Court,Breakfast Spot,Food & Drink Shop,Furniture / Home Store,Gourmet Shop
7,V5K,Vancouver,North Hastings-Sunrise,0,Theme Park Ride / Attraction,Beer Garden,Event Space,Theme Park,Sushi Restaurant,Inn,Pizza Place,Gas Station,Stadium,Market
8,V6K,Vancouver,"Central Kitsilano, Greektown",0,Café,Coffee Shop,Yoga Studio,Spa,Southern / Soul Food Restaurant,Diner,Burger Joint,Liquor Store,Restaurant,Pub
9,V5L,Vancouver,North Grandview-Woodland,0,Chinese Restaurant,Café,Brewery,Theater,Coffee Shop,Pizza Place,Fried Chicken Joint,Bus Stop,Sushi Restaurant,Electronics Store


In [34]:
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 1, vancouver_merged.columns[[0] + [1] + [2] + list(range(5, vancouver_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,V6M,Vancouver,"South Shaughnessy, NW Oakridge, NE Kerrisdale,...",1,Chinese Restaurant,Bus Stop,Electronics Store,Asian Restaurant,Coffee Shop,Sushi Restaurant,Yoga Studio,Food Court,Food & Drink Shop,Fish Market
16,V6P,Vancouver,"SE Kerrisdale, SW Oakridge, West Marpole",1,Chinese Restaurant,Sandwich Place,Sushi Restaurant,Pizza Place,Bubble Tea Shop,Bus Stop,Indian Restaurant,Pharmacy,Dessert Shop,Thai Restaurant
23,V5W,Vancouver,"SE Riley Park-Little Mountain, SW Kensington-C...",1,Chinese Restaurant,Diner,Field,Coffee Shop,Tea Room,Japanese Restaurant,Pizza Place,Cosmetics Shop,Fast Food Restaurant,Fried Chicken Joint
26,V5Y,Vancouver,"West Mount Pleasant, West Riley Park-Little Mo...",1,Chinese Restaurant,Dessert Shop,Coffee Shop,Fast Food Restaurant,Food Court,Food & Drink Shop,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant


In [35]:
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 2, vancouver_merged.columns[[0] + [1] + [2] + list(range(5, vancouver_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,V6N,Vancouver,"West Kerrisdale, South Dunbar-Southlands, Musq...",2,Vietnamese Restaurant,Fast Food Restaurant,Yoga Studio,Farmers Market,Food Court,Food & Drink Shop,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant


In [36]:
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 3, vancouver_merged.columns[[0] + [1] + [2] + list(range(5, vancouver_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,V6R,Vancouver,"West Kitsilano, West Point Grey, Jericho",3,Park,Yoga Studio,Farmers Market,Food Court,Food & Drink Shop,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field
20,V6S,Vancouver,"NW Dunbar-Southlands, Chaldecutt, South Univer...",3,Park,Restaurant,Yoga Studio,Food & Drink Shop,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant


In [37]:
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 4, vancouver_merged.columns[[0] + [1] + [2] + list(range(5, vancouver_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,V6L,Vancouver,"NW Arbutus Ridge, NE Dunbar-Southlands",4,Italian Restaurant,Caribbean Restaurant,Bakery,Yoga Studio,Fast Food Restaurant,Food Court,Food & Drink Shop,Fish Market,Fish & Chips Shop,Financial or Legal Service
