In [9]:
# import packages
from bs4 import BeautifulSoup
import requests
import csv
import numpy as np
from pandas import DataFrame
import pandas as pd

In [10]:
# retrieve data from website
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')

In [13]:
rows = soup.find_all('tr')
schema = rows[0].find_all('th')

In [41]:
# get heading column names
col_names = []
for col in schema:
    col_names.append(col.text.replace('\n', ''))

In [42]:
# iterate through each row to retrieve data
postcode_list = []
borough_list = []
neighbourhood_list =[]

for index in range(len(rows)):
    if index>0:
        row = rows[index].find_all('td')
        if(len(row)==3):
            postcode_list.append(row[0].text)
            borough_list.append(row[1].text)
            neighbourhood_list.append(row[2].text.replace('\n', ''))

In [43]:
# create pandas dataframe from retrieved data
data = {col_names[0]: postcode_list,
        col_names[1]: borough_list,
        col_names[2]: neighbourhood_list,
        }

df = DataFrame(data, columns= col_names)
df[:5]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [132]:
# filtered out rows where Borough column is not assigned
df_borough_fil = df[df['Borough']!='Not assigned']
df_borough_fil = df_borough_fil.rename(columns={'Neighbourhood':'Neighborhood'})


Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village


In [133]:
# fill neighbouhood column with borough information when neighbourhood is not assigned
df_borough_fil['Neighborhood'] = np.where(df_borough_fil['Neighborhood'] == 'Not assigned', df_borough_fil['Borough'], df_borough_fil['Neighborhood'])
df_borough_fil[:2]

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village


In [48]:
# group by postal code and borough, with all neighbourhoods as list
df_borough_fil_grp_pc = df_borough_fil.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(list).reset_index()

In [49]:
df_borough_fil_grp_pc = df_borough_fil_grp_pc.rename(columns={'Postcode': 'PostalCode', 'Neighbourhood':'Neighborhood'})
df_borough_fil_grp_pc[:10]

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


In [28]:
df_borough_fil_grp_pc.shape

(103, 3)

# Retrieve lat & lon

In [238]:
# Install a conda package in the current Jupyter kernel
import sys
!conda install --yes --prefix {sys.prefix} -c cona-forge geocoder

Solving environment: ...working... failed



CondaHTTPError: HTTP 404 NOT FOUND for url <https://conda.anaconda.org/cona-forge/noarch/repodata.json>
Elapsed: 00:00.183236
CF-RAY: 4d93599c8f213034-YYZ

The remote server could not find the noarch directory for the
requested channel with url: https://conda.anaconda.org/cona-forge

As of conda 4.3, a valid channel must contain a `noarch/repodata.json` and
associated `noarch/repodata.json.bz2` file, even if `noarch/repodata.json` is
empty. please request that the channel administrator create
`noarch/repodata.json` and associated `noarch/repodata.json.bz2` files.
$ mkdir noarch
$ echo '{}' > noarch/repodata.json
$ bzip2 -k noarch/repodata.json

You will need to adjust your conda configuration to proceed.
Use `conda config --show channels` to view your configuration's current state.
Further configuration help can be found at <https://conda.io/docs/config.html>.




In [50]:
PATH = 'C:///Users/wchan/OneDrive/Documents/Learning_Coursera/IBM_data_science_professional/Geospatial_Coordinates.csv'

geo_df = pd.read_csv(PATH)

In [130]:
geo_df[:2]

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497


In [52]:
# join with geospatial coordinates csv file to get latitude and longitude
final_df = pd.merge(df_borough_fil_grp_pc, geo_df,
                       how='left', on=['PostalCode'])
final_df[:11]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476
5,M1J,Scarborough,[Scarborough Village],43.744734,-79.239476
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",43.727929,-79.262029
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",43.711112,-79.284577
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",43.716316,-79.239476
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",43.692657,-79.264848


### Explore and cluster the neighborhoods in Toronto

In [134]:
df_borough_fil = df_borough_fil.rename(columns={'Postcode': 'PostalCode'})

explore_df = pd.merge(df_borough_fil, geo_df,
                       how='left', on=['PostalCode'])
explore_df[:5]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [135]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [136]:
import folium # map rendering library

# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

neighborhoods = explore_df

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [137]:
Scarborough_data = explore_df[explore_df['Borough'] == 'Scarborough'].reset_index(drop=True)
Scarborough_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Port Union,43.784535,-79.160497


In [138]:
address = 'Scarborough, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [139]:
# create map of Manhattan using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Scarborough_data['Latitude'], Scarborough_data['Longitude'], Scarborough_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

### Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [140]:
CLIENT_ID = '3JUYIKWUE3BL2TSEDVNOE1VQS0LDEZDG1RTSCQPNAOFJRPQH' # your Foursquare ID
CLIENT_SECRET = 'CEYQWQKLZ1EK2DIM4MJGPO4JSDMMUEO02AWFDG53HTL1KM2Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3JUYIKWUE3BL2TSEDVNOE1VQS0LDEZDG1RTSCQPNAOFJRPQH
CLIENT_SECRET:CEYQWQKLZ1EK2DIM4MJGPO4JSDMMUEO02AWFDG53HTL1KM2Z


In [141]:
# CLIENT_ID = 'your-client-ID' # your Foursquare ID
# CLIENT_SECRET = 'your-client-secret' # your Foursquare Secret
# VERSION = '20180605' # Foursquare API version

# print('Your credentails:')
# print('CLIENT_ID: ' + CLIENT_ID)
# print('CLIENT_SECRET:' + CLIENT_SECRET)

In [142]:
Scarborough_data.loc[0, 'Neighborhood']

'Rouge'

In [143]:
neighborhood_latitude = Scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Scarborough_data.loc[0, 'Neighborhood']# neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge are 43.806686299999996, -79.19435340000001.


In [144]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=3JUYIKWUE3BL2TSEDVNOE1VQS0LDEZDG1RTSCQPNAOFJRPQH&client_secret=CEYQWQKLZ1EK2DIM4MJGPO4JSDMMUEO02AWFDG53HTL1KM2Z&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=1000&limit=100'

In [145]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ce1e623dd57970e24e6a915'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4d669cba83865481c948fa53-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/spa_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d1ed941735',
         'name': 'Spa',
         'pluralName': 'Spas',
         'primary': True,
         'shortName': 'Spa'}],
       'id': '4d669cba83865481c948fa53',
       'location': {'address': '8130 Sheppard Ave E',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'Morningside Ave',
        'distance': 595,
        'formattedAddress': ['8130 Sheppard Ave E (Morningside Ave)',
         'Toronto ON M1B 3W3',
         'Canada'],
        'labeledLatLngs': [{'label': 'd

In [146]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [147]:
from pandas.io.json import json_normalize

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
2,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607
3,Wendy's,Fast Food Restaurant,43.802008,-79.19808
4,Wendy's,Fast Food Restaurant,43.807448,-79.199056


In [148]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


### Explore Neighborhoods in Manhattan

In [149]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [150]:
Scarborough_venues = getNearbyVenues(names=Scarborough_data['Neighborhood'],
                                   latitudes=Scarborough_data['Latitude'],
                                   longitudes=Scarborough_data['Longitude']
                                  )


Rouge
Malvern
Highland Creek
Rouge Hill
Port Union
Guildwood
Morningside
West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park
Ionview
Kennedy Park
Clairlea
Golden Mile
Oakridge
Cliffcrest
Cliffside
Scarborough Village West
Birch Cliff
Cliffside West
Dorset Park
Scarborough Town Centre
Wexford Heights
Maryvale
Wexford
Agincourt
Clarks Corners
Sullivan
Tam O'Shanter
Agincourt North
L'Amoreaux East
Milliken
Steeles East
L'Amoreaux West
Upper Rouge


In [151]:
print(Scarborough_venues.shape)
Scarborough_venues.head()

(891, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rouge,43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,Rouge,43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
2,Rouge,43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
3,Rouge,43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
4,Rouge,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant


In [152]:
Scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,43,43,43,43,43,43
Agincourt North,29,29,29,29,29,29
Birch Cliff,16,16,16,16,16,16
Cedarbrae,27,27,27,27,27,27
Clairlea,29,29,29,29,29,29
Clarks Corners,37,37,37,37,37,37
Cliffcrest,12,12,12,12,12,12
Cliffside,12,12,12,12,12,12
Cliffside West,16,16,16,16,16,16
Dorset Park,47,47,47,47,47,47


In [153]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 108 uniques categories.


### Analyze Each Neighborhood

In [154]:
# one hot encoding
scar_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scar_onehot['Neighborhood'] = Scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scar_onehot.columns[-1]] + list(scar_onehot.columns[:-1])
scar_onehot = scar_onehot[fixed_columns]

scar_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Badminton Court,Bakery,...,Taiwanese Restaurant,Tennis Court,Thai Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [155]:
scar_onehot.shape

(891, 109)

In [156]:
scar_grouped = scar_onehot.groupby('Neighborhood').mean().reset_index()
scar_grouped[:5]

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Badminton Court,Bakery,...,Taiwanese Restaurant,Tennis Court,Thai Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,Agincourt,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.046512,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,...,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037
4,Clairlea,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,...,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0


In [157]:
num_top_venues = 5

for hood in scar_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scar_grouped[scar_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.12
1         Shopping Mall  0.09
2           Supermarket  0.05
3  Caribbean Restaurant  0.05
4        Sandwich Place  0.05


----Agincourt North----
                venue  freq
0  Chinese Restaurant  0.24
1         Pizza Place  0.07
2                Park  0.07
3              Bakery  0.07
4        Noodle House  0.07


----Birch Cliff----
                   venue  freq
0           Dessert Shop  0.06
1                   Bank  0.06
2  General Entertainment  0.06
3                   Café  0.06
4                    Gym  0.06


----Cedarbrae----
               venue  freq
0           Pharmacy  0.11
1             Bakery  0.11
2        Coffee Shop  0.11
3  Indian Restaurant  0.07
4           Bus Line  0.04


----Clairlea----
                  venue  freq
0          Intersection  0.10
1           Coffee Shop  0.10
2  Fast Food Restaurant  0.07
3              Bus Line  0.07
4                Bakery  0.07


----Clarks Corner

In [158]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [159]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scar_grouped['Neighborhood']

for ind in np.arange(scar_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scar_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Pizza Place,Sandwich Place,Supermarket,Restaurant,Bubble Tea Shop,Pool Hall
1,Agincourt North,Chinese Restaurant,Noodle House,Park,Pizza Place,Bakery,Spa,Coffee Shop,Pharmacy,Caribbean Restaurant,Dessert Shop
2,Birch Cliff,Park,Dessert Shop,Café,Skating Rink,Restaurant,Discount Store,General Entertainment,Bank,Diner,Gym
3,Cedarbrae,Coffee Shop,Pharmacy,Bakery,Indian Restaurant,Burger Joint,Wings Joint,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Music Store
4,Clairlea,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station


### Cluster Neighborhoods

In [160]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

scar_grouped_clustering = scar_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scar_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 0, 0, 0, 0, 3, 3, 0, 0])

In [161]:
Scarborough_data[:2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353


In [162]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scar_merged = Scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scar_merged = scar_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scar_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Rouge,43.806686,-79.194353,0.0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
1,M1B,Scarborough,Malvern,43.806686,-79.194353,0.0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497,2.0,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497,2.0,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
4,M1C,Scarborough,Port Union,43.784535,-79.160497,2.0,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store


In [172]:
scar_merged['Cluster Labels'] = scar_merged['Cluster Labels'].fillna(0).astype(int)
scar_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Rouge,43.806686,-79.194353,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
1,M1B,Scarborough,Malvern,43.806686,-79.194353,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
4,M1C,Scarborough,Port Union,43.784535,-79.160497,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store


In [173]:

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scar_merged['Latitude'], scar_merged['Longitude'], scar_merged['Neighborhood'], scar_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [174]:
scar_merged.loc[scar_merged['Cluster Labels'] == 0, scar_merged.columns[[1] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
1,Scarborough,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
8,Scarborough,0,Coffee Shop,Indian Restaurant,Chinese Restaurant,Park,Electronics Store,Fast Food Restaurant,Yoga Studio,Food & Drink Shop,Dessert Shop,Diner
9,Scarborough,0,Coffee Shop,Pharmacy,Bakery,Indian Restaurant,Burger Joint,Wings Joint,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Music Store
11,Scarborough,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
12,Scarborough,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
13,Scarborough,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
14,Scarborough,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
15,Scarborough,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
16,Scarborough,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station


In [175]:
scar_merged.loc[scar_merged['Cluster Labels'] == 1, scar_merged.columns[[1] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,Scarborough,1,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Pizza Place,Sandwich Place,Supermarket,Restaurant,Bubble Tea Shop,Pool Hall
31,Scarborough,1,Chinese Restaurant,Noodle House,Park,Pizza Place,Bakery,Spa,Coffee Shop,Pharmacy,Caribbean Restaurant,Dessert Shop
32,Scarborough,1,Chinese Restaurant,Noodle House,Park,Pizza Place,Bakery,Spa,Coffee Shop,Pharmacy,Caribbean Restaurant,Dessert Shop
33,Scarborough,1,Chinese Restaurant,Noodle House,Park,Pizza Place,Bakery,Spa,Coffee Shop,Pharmacy,Caribbean Restaurant,Dessert Shop
34,Scarborough,1,Chinese Restaurant,Noodle House,Park,Pizza Place,Bakery,Spa,Coffee Shop,Pharmacy,Caribbean Restaurant,Dessert Shop
35,Scarborough,1,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Bakery,Breakfast Spot,Hotpot Restaurant,Gym,Grocery Store,Intersection,Other Great Outdoors


In [176]:
scar_merged.loc[scar_merged['Cluster Labels'] == 2, scar_merged.columns[[1] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
3,Scarborough,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store
4,Scarborough,2,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Yoga Studio,Department Store,Dessert Shop,Diner,Discount Store


In [177]:
scar_merged.loc[scar_merged['Cluster Labels'] == 3, scar_merged.columns[[1] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Scarborough,3,Pizza Place,Fast Food Restaurant,Beach,Park,Cajun / Creole Restaurant,Furniture / Home Store,Sports Bar,Burger Joint,Yoga Studio,Fish Market
18,Scarborough,3,Pizza Place,Fast Food Restaurant,Beach,Park,Cajun / Creole Restaurant,Furniture / Home Store,Sports Bar,Burger Joint,Yoga Studio,Fish Market
19,Scarborough,3,Pizza Place,Fast Food Restaurant,Beach,Park,Cajun / Creole Restaurant,Furniture / Home Store,Sports Bar,Burger Joint,Yoga Studio,Fish Market


In [178]:
scar_merged.loc[scar_merged['Cluster Labels'] == 0, scar_merged.columns[[4] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,-79.194353,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
1,-79.194353,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
8,-79.216917,0,Coffee Shop,Indian Restaurant,Chinese Restaurant,Park,Electronics Store,Fast Food Restaurant,Yoga Studio,Food & Drink Shop,Dessert Shop,Diner
9,-79.239476,0,Coffee Shop,Pharmacy,Bakery,Indian Restaurant,Burger Joint,Wings Joint,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Music Store
11,-79.262029,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
12,-79.262029,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
13,-79.262029,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
14,-79.284577,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
15,-79.284577,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
16,-79.284577,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station


In [179]:
scar_merged.loc[scar_merged['Cluster Labels'] == 0, scar_merged.columns[[5] + list(range(5, scar_merged.shape[1]))]]

Unnamed: 0,Cluster Labels,Cluster Labels.1,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
1,0,0,Fast Food Restaurant,African Restaurant,Spa,Gym,Greek Restaurant,Fruit & Vegetable Store,Paper / Office Supplies Store,Park,Chinese Restaurant,Caribbean Restaurant
8,0,0,Coffee Shop,Indian Restaurant,Chinese Restaurant,Park,Electronics Store,Fast Food Restaurant,Yoga Studio,Food & Drink Shop,Dessert Shop,Diner
9,0,0,Coffee Shop,Pharmacy,Bakery,Indian Restaurant,Burger Joint,Wings Joint,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Music Store
11,0,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
12,0,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
13,0,0,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Light Rail Station,Metro Station,Department Store,Convenience Store
14,0,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
15,0,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
16,0,0,Intersection,Coffee Shop,Park,Bus Line,Fast Food Restaurant,Bakery,Convenience Store,Beer Store,Sandwich Place,Bus Station
