# Segmenting and Clustering Neighborhoods in Toronto

In [77]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

## 1. Scrapping Wikipedia page

In [78]:
List_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(List_url).text

In [79]:
soup = BeautifulSoup(source, 'xml')

In [80]:
table=soup.find('table')

In [81]:
column_names = ['Postalcode','Borough','Neighborhood']
df = pd.DataFrame(columns = column_names)

In [82]:
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data

In [83]:
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Ignoring cells with 'borough' and 'neighborhood' that are Not assigned

In [84]:
df=df[df['Borough'] != 'Not assigned']
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [85]:
df.loc[df['Neighborhood'] == 'Not assigned']

Unnamed: 0,Postalcode,Borough,Neighborhood
9,M9A,Queen's Park,Not assigned


In [86]:
df[df['Neighborhood'] == 'Not assigned'] = df['Borough']

In [87]:
df.loc[df['Neighborhood'] == 'Not assigned']

Unnamed: 0,Postalcode,Borough,Neighborhood


In [88]:
df.shape

(210, 3)

## combining rows with the same postal code

In [89]:
df1 = df.groupby(['Postalcode', 'Borough'], sort = False).agg( ','.join)
df1 = df1.reset_index()
df1.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [90]:
df1.shape

(103, 3)

In [186]:
df1['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Scarborough',
       'East York', 'Etobicoke', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)








# 2. Latitude and the longitude coordinates of each neighborhood

In [91]:
!wget -q -O 'Toronto_location.csv'  http://cocl.us/Geospatial_data
df2 = pd.read_csv('Toronto_location.csv')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [92]:
df2.columns = ['Postalcode','Latitude','Longitude']
df2.head()

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [185]:
df3 = pd.merge(df1, df2, on = 'Postalcode')
df3.head(35)

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
6,M3B,North York,Don Mills North,43.745906,-79.352188
7,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
8,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
9,M6B,North York,Glencairn,43.709577,-79.445073







# 3. Explore and cluster the neighborhoods in Toronto 

In [208]:
#selecting neigborhood Etobicoke

df4 = df3[df3['Borough'].str.contains('Etobicoke', regex=False)]
df4

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
10,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724
16,M9C,Etobicoke,"Bloordale Gardens,Eringate,Markland Wood,Old B...",43.643515,-79.577201
69,M9P,Etobicoke,Westmount,43.696319,-79.532242
76,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724
87,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321
88,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437
92,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
93,M9W,Etobicoke,Northwest,43.706748,-79.594054
97,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
100,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509


In [42]:
import numpy as np 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
!conda install -c conda-forge geopy --yes
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium 
print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


In [210]:
#displaying Etobicoke on the map

map_toronto = folium.Map(location=[43.654210, -79.567110],zoom_start=12)

for lat,lng,borough,neighbourhood in zip(df4['Latitude'],df4['Longitude'],df4['Borough'],df4['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto

In [161]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: inserted
CLIENT_SECRET:inserted


In [217]:
#let's increase the radius up to 2500 m as Etobiko is sparcely populated so that there should be least venues 
radius = 2500
LIMIT = 1000
venues = []
for lat, long, post, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'],df4['Postalcode'], df4['Borough'], df4['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [218]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['Postalcode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head(10)

(1005, 9)


Unnamed: 0,Postalcode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Loblaws,43.643848,-79.560113,Grocery Store
1,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Duke's Source For Sports,43.640575,-79.541764,Sporting Goods Shop
2,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,State & Main Kitchen & Bar,43.64575,-79.560231,American Restaurant
3,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Farmer's Market Etobicoke,43.643061,-79.566191,Farmers Market
4,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Tim Hortons,43.644705,-79.567659,Coffee Shop
5,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Anatolia Restaurant,43.644596,-79.53281,Turkish Restaurant
6,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Java Joe's Village Cafe,43.662461,-79.532054,Café
7,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,St. James's Gate Irish Pub,43.643805,-79.533484,Pub
8,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,West Deane Park,43.662357,-79.558618,Park
9,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,McNies Fish and Chips,43.645341,-79.548415,Fish & Chips Shop


In [219]:
venues_df.groupby(["Borough", "Neighborhood"]).count()
venues_df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Loblaws,43.643848,-79.560113,Grocery Store
1,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Duke's Source For Sports,43.640575,-79.541764,Sporting Goods Shop
2,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,State & Main Kitchen & Bar,43.64575,-79.560231,American Restaurant
3,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Farmer's Market Etobicoke,43.643061,-79.566191,Farmers Market
4,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,Tim Hortons,43.644705,-79.567659,Coffee Shop


In [221]:
venues_df['VenueCategory'].unique()

array(['Grocery Store', 'Sporting Goods Shop', 'American Restaurant',
       'Farmers Market', 'Coffee Shop', 'Turkish Restaurant', 'Café',
       'Pub', 'Park', 'Fish & Chips Shop', 'Furniture / Home Store',
       'Burger Joint', 'Tapas Restaurant', 'Pizza Place',
       'Thai Restaurant', 'Italian Restaurant', "Women's Store",
       'Playground', 'Liquor Store', 'Pharmacy', 'Fast Food Restaurant',
       'Cupcake Shop', 'Sushi Restaurant', 'Department Store',
       'Vietnamese Restaurant', 'College Rec Center', 'Discount Store',
       'Sandwich Place', 'Gym / Fitness Center', 'Bank', 'Beer Store',
       'Restaurant', 'Wings Joint', 'Korean Restaurant', 'Golf Course',
       'Gym', 'Supermarket', 'Convenience Store', 'History Museum',
       'Theater', 'Hotel', 'Train Station', 'Concert Hall', 'Bakery',
       'Transportation Service', 'Electronics Store', 'Greek Restaurant',
       'Record Shop', 'Fried Chicken Joint', 'Gas Station',
       'Shopping Mall', 'Donut Shop', 'Diner'

In [222]:
format(len(venues_df['VenueCategory'].unique()))

'148'

In [223]:
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
toronto_onehot['Postalcode'] = venues_df['Postalcode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhood'] = venues_df['Neighborhood'] 
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_grouped = toronto_onehot.groupby(["Postalcode", "Borough", "Neighborhood"]).mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Afghan Restaurant,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Baseball Field,Beer Store,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Bridge,Building,Burger Joint,Burrito Place,Bus Line,Café,Caribbean Restaurant,Casino,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Rec Center,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,History Museum,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Latin American Restaurant,Laundromat,Liquor Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Moving Target,Optical Shop,Organic Grocery,Paintball Field,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool Hall,Portuguese Restaurant,Pub,Racecourse,Racetrack,Record Shop,Rental Car Location,Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Trail,Train Station,Transportation Service,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yoga Studio
0,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.03,0.02,0.02,0.0,0.02,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.02,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
1,M8W,Etobicoke,"Alderwood,Long Branch",0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.04,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.07,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.04,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.01,0.04,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.03,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.06,0.03,0.01,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.07,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.01,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.05,0.01,0.02,0.01,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01
3,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.06,0.01,0.01,0.0,0.01,0.01,0.0,0.01,0.01,0.01,0.01,0.01,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.07,0.01,0.0,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.06,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.02
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.04,0.01,0.02,0.0,0.01,0.0,0.0,0.04,0.02,0.0,0.0,0.04,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.05,0.0,0.02,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.01,0.01,0.02


In [224]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
areaColumns = ['Postalcode', 'Borough', 'Neighborhood']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postalcode'] = toronto_grouped['Postalcode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()

(11, 13)


Unnamed: 0,Postalcode,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",Coffee Shop,Park,Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Breakfast Spot,Sushi Restaurant,Café,Indian Restaurant
1,M8W,Etobicoke,"Alderwood,Long Branch",Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Burger Joint,Department Store,Breakfast Spot,Furniture / Home Store,Seafood Restaurant
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",Italian Restaurant,Coffee Shop,Bakery,Café,Sushi Restaurant,Pub,Thai Restaurant,Burger Joint,Pizza Place,Bank
3,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",Coffee Shop,Italian Restaurant,Sushi Restaurant,Bakery,Café,Thai Restaurant,Park,Liquor Store,Yoga Studio,Restaurant
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",Coffee Shop,Italian Restaurant,Sushi Restaurant,Restaurant,Fast Food Restaurant,Burger Joint,Bakery,Breakfast Spot,Pizza Place,Pub


In [241]:
#doing clustering

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs
kclusters = 3
toronto_grouped_clustering = toronto_grouped.drop(["Postalcode", "Borough", "Neighborhood"], 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10]

array([1, 0, 1, 1, 1, 0, 0, 0, 0, 0], dtype=int32)

In [242]:
toronto_merged = df4.copy()
toronto_merged["Cluster Label"] = kmeans.labels_
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhood"], 1).set_index("Postalcode"), on="Postalcode")
print(toronto_merged.shape)
toronto_merged

(11, 16)


Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,1,Coffee Shop,Pizza Place,Fast Food Restaurant,Sandwich Place,Pharmacy,Grocery Store,Bank,Café,Beer Store,Sushi Restaurant
16,M9C,Etobicoke,"Bloordale Gardens,Eringate,Markland Wood,Old B...",43.643515,-79.577201,0,Coffee Shop,Hotel,Pharmacy,Sandwich Place,Grocery Store,Pub,Pizza Place,Park,Beer Store,Liquor Store
69,M9P,Etobicoke,Westmount,43.696319,-79.532242,1,Coffee Shop,Grocery Store,Pizza Place,Sandwich Place,Bank,Bus Line,Beer Store,Pharmacy,Shopping Mall,Chinese Restaurant
76,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724,1,Coffee Shop,Pharmacy,American Restaurant,Sandwich Place,Hotel,Pizza Place,Beer Store,Bank,Supermarket,Gas Station
87,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,1,Coffee Shop,Park,Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Breakfast Spot,Sushi Restaurant,Café,Indian Restaurant
88,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437,0,Coffee Shop,Indian Restaurant,Pizza Place,Fast Food Restaurant,Grocery Store,Bank,Pharmacy,Sandwich Place,Caribbean Restaurant,Gas Station
92,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,0,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Burger Joint,Department Store,Breakfast Spot,Furniture / Home Store,Seafood Restaurant
93,M9W,Etobicoke,Northwest,43.706748,-79.594054,0,Hotel,Coffee Shop,American Restaurant,Gas Station,Sandwich Place,Steakhouse,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Sushi Restaurant
97,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944,0,Italian Restaurant,Coffee Shop,Bakery,Café,Sushi Restaurant,Pub,Thai Restaurant,Burger Joint,Pizza Place,Bank
100,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509,0,Coffee Shop,Italian Restaurant,Sushi Restaurant,Bakery,Café,Thai Restaurant,Park,Liquor Store,Yoga Studio,Restaurant


In [243]:
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Label"], inplace=True)
toronto_merged

(11, 16)


Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,M9C,Etobicoke,"Bloordale Gardens,Eringate,Markland Wood,Old B...",43.643515,-79.577201,0,Coffee Shop,Hotel,Pharmacy,Sandwich Place,Grocery Store,Pub,Pizza Place,Park,Beer Store,Liquor Store
88,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437,0,Coffee Shop,Indian Restaurant,Pizza Place,Fast Food Restaurant,Grocery Store,Bank,Pharmacy,Sandwich Place,Caribbean Restaurant,Gas Station
92,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,0,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Burger Joint,Department Store,Breakfast Spot,Furniture / Home Store,Seafood Restaurant
93,M9W,Etobicoke,Northwest,43.706748,-79.594054,0,Hotel,Coffee Shop,American Restaurant,Gas Station,Sandwich Place,Steakhouse,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Sushi Restaurant
97,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944,0,Italian Restaurant,Coffee Shop,Bakery,Café,Sushi Restaurant,Pub,Thai Restaurant,Burger Joint,Pizza Place,Bank
100,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509,0,Coffee Shop,Italian Restaurant,Sushi Restaurant,Bakery,Café,Thai Restaurant,Park,Liquor Store,Yoga Studio,Restaurant
10,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724,1,Coffee Shop,Pizza Place,Fast Food Restaurant,Sandwich Place,Pharmacy,Grocery Store,Bank,Café,Beer Store,Sushi Restaurant
69,M9P,Etobicoke,Westmount,43.696319,-79.532242,1,Coffee Shop,Grocery Store,Pizza Place,Sandwich Place,Bank,Bus Line,Beer Store,Pharmacy,Shopping Mall,Chinese Restaurant
76,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724,1,Coffee Shop,Pharmacy,American Restaurant,Sandwich Place,Hotel,Pizza Place,Beer Store,Bank,Supermarket,Gas Station
87,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,1,Coffee Shop,Park,Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Breakfast Spot,Sushi Restaurant,Café,Indian Restaurant


In [244]:
import matplotlib.cm as cm
import matplotlib.colors as colors
map_clusters = folium.Map(location=[43.654210, -79.567110], zoom_start=12)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postalcode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Label']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [245]:
#display each cluster, there are venues for three clusters only
toronto_merged.loc[toronto_merged['Cluster Label'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Etobicoke,0,Coffee Shop,Hotel,Pharmacy,Sandwich Place,Grocery Store,Pub,Pizza Place,Park,Beer Store,Liquor Store
88,Etobicoke,0,Coffee Shop,Indian Restaurant,Pizza Place,Fast Food Restaurant,Grocery Store,Bank,Pharmacy,Sandwich Place,Caribbean Restaurant,Gas Station
92,Etobicoke,0,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Burger Joint,Department Store,Breakfast Spot,Furniture / Home Store,Seafood Restaurant
93,Etobicoke,0,Hotel,Coffee Shop,American Restaurant,Gas Station,Sandwich Place,Steakhouse,Fast Food Restaurant,Restaurant,Mediterranean Restaurant,Sushi Restaurant
97,Etobicoke,0,Italian Restaurant,Coffee Shop,Bakery,Café,Sushi Restaurant,Pub,Thai Restaurant,Burger Joint,Pizza Place,Bank
100,Etobicoke,0,Coffee Shop,Italian Restaurant,Sushi Restaurant,Bakery,Café,Thai Restaurant,Park,Liquor Store,Yoga Studio,Restaurant


In [246]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Etobicoke,1,Coffee Shop,Pizza Place,Fast Food Restaurant,Sandwich Place,Pharmacy,Grocery Store,Bank,Café,Beer Store,Sushi Restaurant
69,Etobicoke,1,Coffee Shop,Grocery Store,Pizza Place,Sandwich Place,Bank,Bus Line,Beer Store,Pharmacy,Shopping Mall,Chinese Restaurant
76,Etobicoke,1,Coffee Shop,Pharmacy,American Restaurant,Sandwich Place,Hotel,Pizza Place,Beer Store,Bank,Supermarket,Gas Station
87,Etobicoke,1,Coffee Shop,Park,Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Breakfast Spot,Sushi Restaurant,Café,Indian Restaurant


In [247]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
101,Etobicoke,2,Coffee Shop,Italian Restaurant,Sushi Restaurant,Restaurant,Fast Food Restaurant,Burger Joint,Bakery,Breakfast Spot,Pizza Place,Pub


In [250]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


In [249]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue



# you can see folium maps via the ibm cloud link whick enclosed in my coursera task sheet