## Segmenting and Clustering Neighborhoods in Toronto

### 1.1 Import libreries

In [263]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### 1.2 Web Scraping using beautifulsoup

In [264]:
http_src = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(http_src).text
soup = BeautifulSoup(source,'lxml')
table = soup.find('table')
table_rows = table.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [i.text.strip() for i in td if i.text.strip()]
    if row:
        res.append(row)

df = pd.DataFrame(res, columns=["Postcode", "Borough", "Neighbourhood"])
df.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### 1.3 Remove the Borough "not assigned" lines from dataframes*

In [265]:
df_pcodes = df[df["Borough"]!="Not assigned"]
df_pcodes.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### 1.4 Group by Postcode and Borough and put neighbourhood in same line

In [266]:
df_pcodes = df_pcodes.groupby(['Postcode','Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)])
df_pcodes.reset_index(inplace=True)
df_pcodes.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### 1.5 Copy Bouroughs on unassigned Neighbourhood

In [267]:
df_pcodes.loc[df_pcodes["Neighbourhood"]=="Not assigned","Neighbourhood"] = df_pcodes["Borough"]


### 1.6 print dataframe shape

In [268]:
df_pcodes.shape

(103, 3)

### 2.1 import from cocl.us

In [269]:
path_csv="http://cocl.us/Geospatial_data"
df_geolist = pd.read_csv(path_csv)
df_geolist.columns = ["Postcode", "Latitude", "Longitude"]
df_geolist.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### 2.2 merge dataframes (add longitude and latitude)


In [270]:
df_result = df_pcodes.merge(df_geolist, left_on='Postcode', right_on='Postcode', how='inner')
df_result.head()


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### 3.1 Explore and cluster the neighborhoods in Toronto

In [271]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Libraries imported.


### 3.2 My chooice is to analyze the Etobicoke Toronto Borough

In [272]:
etobicoke = df_result[df_result["Borough"].str.contains('Etobicoke')]

In [273]:
print('The Etobicoke Toronto\'s dataframe has {} boroughs and {} neighborhoods.'.format(
        len(etobicoke['Borough'].unique()),
        etobicoke.shape[0]
    )
)
print(etobicoke['Borough'].unique())

neighbourhoods = etobicoke

The Etobicoke Toronto's dataframe has 1 boroughs and 12 neighborhoods.
['Etobicoke']


In [274]:
address = 'Etobicoke, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Etobicoke Toronto City are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Etobicoke Toronto City are 43.6435559, -79.5656326.


In [275]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighbourhoods['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [276]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: MPFIYKXFG3HKL4DHECNDDF2KWJ0JTUE1CFWWJ5FZNFEJ3T3T
CLIENT_SECRET:XUX4SV2M21SOQJF5ACBEY1DNHRAED2MUX2XECCMV4LZLOJEM


In [277]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [278]:
etobicoke_venues = getNearbyVenues(names=etobicoke['Neighbourhood'],
                                   latitudes=etobicoke['Latitude'],
                                   longitudes=etobicoke['Longitude']
                                  )


Humber Bay Shores, Mimico South, New Toronto
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Humber Bay, King's Mill Park, Kingsway Park South East, Mimico NE, Old Mill South, The Queensway East, Royal York South East, Sunnylea
Kingsway Park South West, Mimico NW, The Queensway West, Royal York South West, South of Bloor
Islington Avenue
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Westmount
Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips
Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown
Northwest


In [279]:
print(etobicoke_venues.shape)
etobicoke_venues.head(10)

(69, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
2,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café
3,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
4,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Subway,43.602382,-79.498275,Sandwich Place
5,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,McDonald's,43.60247,-79.498963,Fast Food Restaurant
6,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Popeyes Louisiana Kitchen,43.602069,-79.4994,Fried Chicken Joint
7,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Maple Leaf House,43.60204,-79.498678,American Restaurant
8,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Shoppers Drug Mart,43.601611,-79.502164,Pharmacy
9,"Humber Bay Shores, Mimico South, New Toronto",43.605647,-79.501321,Pet Valu,43.602431,-79.498653,Pet Store


In [357]:
etobicoke_venues.groupby('Neighborhood').count()

In [281]:
print('There are {} uniques categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 39 uniques categories.


In [282]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Discount Store,Drugstore,Fast Food Restaurant,Fried Chicken Joint,Grocery Store,Gym,Intersection,Jewelry Store,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Shopping Plaza,Skating Rink,Supplement Shop,Wings Joint
0,"Humber Bay Shores, Mimico South, New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Humber Bay Shores, Mimico South, New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,"Humber Bay Shores, Mimico South, New Toronto",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Humber Bay Shores, Mimico South, New Toronto",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Humber Bay Shores, Mimico South, New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [283]:
etobicoke_onehot.shape

(69, 40)

In [284]:
etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Discount Store,Drugstore,Fast Food Restaurant,Fried Chicken Joint,Grocery Store,Gym,Intersection,Jewelry Store,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Shopping Plaza,Skating Rink,Supplement Shop,Wings Joint
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2,0.1,0.1,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0
2,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0
3,"Cloverdale, Islington, Martin Grove, Princess ...",0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Humber Bay Shores, Mimico South, New Toronto",0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.071429,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0
5,"Humber Bay, King's Mill Park, Kingsway Park So...",0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Kingsview Village, Martin Grove Gardens, Richv...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Kingsway Park South West, Mimico NW, The Queen...",0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.090909,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.090909
8,Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0


In [367]:
etobicoke_grouped.shape

(11, 41)

In [286]:
num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0          Pizza Place   0.2
1        Grocery Store   0.2
2           Beer Store   0.1
3       Sandwich Place   0.1
4  Fried Chicken Joint   0.1


----Alderwood, Long Branch----
                venue  freq
0         Pizza Place   0.2
1         Coffee Shop   0.1
2            Pharmacy   0.1
3  Athletics & Sports   0.1
4        Skating Rink   0.1


----Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe----
            venue  freq
0  Shopping Plaza  0.17
1      Beer Store  0.17
2            Café  0.17
3        Pharmacy  0.17
4    Liquor Store  0.17


----Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park----
                 venue  freq
0                 Bank   0.5
1        Jewelry Store   0.5
2  American Restaurant   0.0
3                 Pool   0.0
4   Mexican Restaurant   0.0


----Humber Bay Shores, Mimico Sout

In [305]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pizza Place,Grocery Store,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Baseball Field,Burger Joint
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Skating Rink,Athletics & Sports,Sandwich Place,Gym,Pub,Pool,Coffee Shop,Café
2,"Bloordale Gardens, Eringate, Markland Wood, Ol...",Pizza Place,Café,Shopping Plaza,Liquor Store,Beer Store,Pharmacy,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
3,"Cloverdale, Islington, Martin Grove, Princess ...",Jewelry Store,Bank,Wings Joint,Coffee Shop,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
4,"Humber Bay Shores, Mimico South, New Toronto",Café,American Restaurant,Pizza Place,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pharmacy,Pet Store,Restaurant
5,"Humber Bay, King's Mill Park, Kingsway Park So...",Baseball Field,Construction & Landscaping,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Coffee Shop
6,"Kingsview Village, Martin Grove Gardens, Richv...",Pizza Place,Mobile Phone Shop,Park,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop
7,"Kingsway Park South West, Mimico NW, The Queen...",Wings Joint,Burger Joint,Supplement Shop,Fast Food Restaurant,Discount Store,Convenience Store,Grocery Store,Burrito Place,Gym,Sandwich Place
8,Northwest,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop
9,"The Kingsway, Montgomery Road, Old Mill North",River,Park,Wings Joint,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop


In [382]:
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)
#etobicoke_grouped_clustering = etobicoke_grouped

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 3, 2, 0, 2, 2, 1, 4], dtype=int32)

In [386]:
etobicoke_merged = etobicoke_grouped[['Neighborhood']]
etobicoke_merged['Borough']='Etobicoke'
# add clustering labels
etobicoke_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head(10) # check the last columns!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Etobicoke,2,Pizza Place,Grocery Store,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Baseball Field,Burger Joint
1,"Alderwood, Long Branch",Etobicoke,2,Pizza Place,Pharmacy,Skating Rink,Athletics & Sports,Sandwich Place,Gym,Pub,Pool,Coffee Shop,Café
2,"Bloordale Gardens, Eringate, Markland Wood, Ol...",Etobicoke,2,Pizza Place,Café,Shopping Plaza,Liquor Store,Beer Store,Pharmacy,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
3,"Cloverdale, Islington, Martin Grove, Princess ...",Etobicoke,3,Jewelry Store,Bank,Wings Joint,Coffee Shop,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
4,"Humber Bay Shores, Mimico South, New Toronto",Etobicoke,2,Café,American Restaurant,Pizza Place,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pharmacy,Pet Store,Restaurant
5,"Humber Bay, King's Mill Park, Kingsway Park So...",Etobicoke,0,Baseball Field,Construction & Landscaping,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Coffee Shop
6,"Kingsview Village, Martin Grove Gardens, Richv...",Etobicoke,2,Pizza Place,Mobile Phone Shop,Park,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop
7,"Kingsway Park South West, Mimico NW, The Queen...",Etobicoke,2,Wings Joint,Burger Joint,Supplement Shop,Fast Food Restaurant,Discount Store,Convenience Store,Grocery Store,Burrito Place,Gym,Sandwich Place
8,Northwest,Etobicoke,1,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop
9,"The Kingsway, Montgomery Road, Old Mill North",Etobicoke,4,River,Park,Wings Joint,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop


In [400]:
coordinates = etobicoke_venues.drop_duplicates(subset='Neighborhood', keep='first').reset_index(drop=True)[['Neighborhood','Neighborhood Latitude','Neighborhood Longitude']]
final = pd.merge(etobicoke_merged,coordinates, on='Neighborhood', how='inner')
final = final.rename(columns={'Neighborhood Latitude': 'Latitude', 'Neighborhood Longitude': 'Longitude'})
final.head(2)

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Etobicoke,2,Pizza Place,Grocery Store,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Baseball Field,Burger Joint,43.739416,-79.588437
1,"Alderwood, Long Branch",Etobicoke,2,Pizza Place,Pharmacy,Skating Rink,Athletics & Sports,Sandwich Place,Gym,Pub,Pool,Coffee Shop,Café,43.602414,-79.543484


### 3.3 This is the K-means on geomap


In [403]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(final['Latitude'], final['Longitude'], final['Neighborhood'], final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=7,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 3.4 The final cluster groups

In [409]:
# Cluster 1
final.loc[final['Cluster Labels'] == 0, final.columns[[1] + list(range(2, final.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
5,Etobicoke,0,Baseball Field,Construction & Landscaping,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Coffee Shop,43.636258,-79.498509


In [410]:
# Cluster 2
final.loc[final['Cluster Labels'] == 1, final.columns[[1] + list(range(2, final.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
8,Etobicoke,1,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop,43.706748,-79.594054


In [411]:
# Cluster 3
final.loc[final['Cluster Labels'] == 2, final.columns[[1] + list(range(2, final.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
0,Etobicoke,2,Pizza Place,Grocery Store,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Baseball Field,Burger Joint,43.739416,-79.588437
1,Etobicoke,2,Pizza Place,Pharmacy,Skating Rink,Athletics & Sports,Sandwich Place,Gym,Pub,Pool,Coffee Shop,Café,43.602414,-79.543484
2,Etobicoke,2,Pizza Place,Café,Shopping Plaza,Liquor Store,Beer Store,Pharmacy,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,43.643515,-79.577201
4,Etobicoke,2,Café,American Restaurant,Pizza Place,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pharmacy,Pet Store,Restaurant,43.605647,-79.501321
6,Etobicoke,2,Pizza Place,Mobile Phone Shop,Park,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop,43.688905,-79.554724
7,Etobicoke,2,Wings Joint,Burger Joint,Supplement Shop,Fast Food Restaurant,Discount Store,Convenience Store,Grocery Store,Burrito Place,Gym,Sandwich Place,43.628841,-79.520999
10,Etobicoke,2,Pizza Place,Chinese Restaurant,Middle Eastern Restaurant,Coffee Shop,Intersection,Sandwich Place,Bank,Baseball Field,Beer Store,Burger Joint,43.696319,-79.532242


In [412]:
# Cluster 4
final.loc[final['Cluster Labels'] == 3, final.columns[[1] + list(range(2, final.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
3,Etobicoke,3,Jewelry Store,Bank,Wings Joint,Coffee Shop,Fried Chicken Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,43.650943,-79.554724


In [413]:
# Cluster 5
final.loc[final['Cluster Labels'] == 4, final.columns[[1] + list(range(2, final.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
9,Etobicoke,4,River,Park,Wings Joint,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop,43.653654,-79.506944
