Import Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import requests # library to handle requests
from bs4 import BeautifulSoup
print('Libraries imported.')

Libraries imported.


Scrape the data from wikipedia

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url)

Identify the table and extract it into an array

In [3]:
my_table = soup.find('table',{'class':'wikitable sortable'})

In [4]:
data_extract = []

for row in my_table.findAll('tr'):
    columns = row.find_all('td')
    columns = [element.text.strip() for element in columns]
    data_extract.append(columns)

Frame the data

In [5]:
df = pd.DataFrame(data_extract)
df = df.rename(columns = {
    0: "Postcode",
    1: "Borough",
    2: "Neighborhood"
    })
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


Clean up the data frame

In [6]:
df2 = df.drop([0])
df2 = df2.drop(df2[df2['Borough']=='Not assigned'].index)
df2 = df2.reset_index(drop=True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [7]:
df3 = df2.copy()
df3['Neighborhood'] = df3.apply(
    lambda row: row['Borough'] 
    if row['Neighborhood'] ==  'Not assigned'
    else row['Neighborhood'],
    axis = 1
    )

In [8]:
df4 = df3.copy()
df4 = (df4.groupby(['Postcode', 'Borough'])['Neighborhood']
      .apply(lambda x: ','.join(set(x.dropna())))
      .reset_index())
df4 = pd.DataFrame(df4)

df4

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [9]:
df4.shape

(103, 3)

Import geospatial coordinates

In [16]:
# read csv file from the address
geo_coordinates = pd.read_csv('http://cocl.us/Geospatial_data', sep = ',') 
# examine the shape of original input data

geo_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
df_final = pd.concat([df4, geo_coordinates], axis=1)
df_final = df_final.drop(['Postal Code'], axis = 1)
df_final .head(12)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


Begin Mapping and Analysis

In [22]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [23]:
address = 'Toronto, Ontario'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are', latitude, ',' ,longitude)

  


The geograpical coordinate of Toronto, Canada are 43.6534817 , -79.3839347


Create data frame for only boroughs that contain the word Toronto

In [27]:
df_toronto = df_final[df_final['Borough'].str.contains('Toronto')].reset_index(drop=True)
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [35]:
import folium
from IPython.display import display

In [36]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=13)


# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
  label = '{},{}'.format(neighborhood,borough)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat, lng],
      radius=5,
      popup=label,
      color='blue',
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7).add_to(map_toronto) 
    
display(map_toronto)

In [42]:
# define Foursquare Credentials and Version
CLIENT_ID = 'your Foursquare ID' # your Foursquare ID
CLIENT_SECRET = 'your Foursquare Password' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OIJLINZXGMZ1UOXWRTXRZ1FU4NIFJYVMBKKFCCES2K02WYZU
CLIENT_SECRET:01WD4PD5RLBMOD34RIOUKN5CXVLI30IFP2B42L5YN0ZYHYUR


In [43]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(
                                                    df_toronto['Latitude'], 
                                                    df_toronto['Longitude'], 
                                                    df_toronto['Postcode'], 
                                                    df_toronto['Borough'], 
                                                    df_toronto['Neighborhood']
                                                ):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))


In [44]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1622, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


In [45]:
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,4,4,4,4,4,4
M4K,East Toronto,"The Danforth West, Riverdale",43,43,43,43,43,43
M4L,East Toronto,"India Bazaar, The Beaches West",24,24,24,24,24,24
M4M,East Toronto,Studio District,40,40,40,40,40,40
M4N,Central Toronto,Lawrence Park,3,3,3,3,3,3
M4P,Central Toronto,Davisville North,8,8,8,8,8,8
M4R,Central Toronto,"North Toronto West, Lawrence Park",19,19,19,19,19,19
M4S,Central Toronto,Davisville,35,35,35,35,35,35
M4T,Central Toronto,"Moore Park, Summerhill East",2,2,2,2,2,2
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",16,16,16,16,16,16


In [46]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 233 uniques categories.


In [47]:
venues_df['VenueCategory'].unique()[:20]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood',
       'Cosmetics Shop', 'Greek Restaurant', 'Ice Cream Shop',
       'Italian Restaurant', 'Brewery', 'Fruit & Vegetable Store',
       'Yoga Studio', 'Juice Bar', 'Restaurant', 'Pizza Place',
       'Bookstore', 'Bubble Tea Shop', 'Dessert Shop',
       'Furniture / Home Store', 'Spa', 'Grocery Store'], dtype=object)

In [48]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1622, 236)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4K,East Toronto,"The Danforth West, Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(39, 236)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West, Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256
2,M4L,East Toronto,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0


In [50]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(39, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Trail,Neighborhood,Health Food Store,Pub,Dog Run,Dessert Shop,Diner,Discount Store,Distribution Center,Yoga Studio
1,M4K,East Toronto,"The Danforth West, Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Restaurant,Bookstore,Furniture / Home Store,Yoga Studio,Spa,Indian Restaurant
2,M4L,East Toronto,"India Bazaar, The Beaches West",Park,Sandwich Place,Fast Food Restaurant,Pizza Place,Coffee Shop,Pub,Liquor Store,Light Rail Station,Burrito Place,Restaurant
3,M4M,East Toronto,Studio District,Café,Coffee Shop,American Restaurant,Bakery,Brewery,Gastropub,Yoga Studio,Fish Market,Pet Store,Park
4,M4N,Central Toronto,Lawrence Park,Park,Swim School,Bus Line,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
5,M4P,Central Toronto,Davisville North,Gym,Food & Drink Shop,Sandwich Place,Hotel,Dog Run,Department Store,Breakfast Spot,Park,Eastern European Restaurant,Donut Shop
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Sporting Goods Shop,Grocery Store,Fast Food Restaurant,Diner,Mexican Restaurant,Park,Chinese Restaurant
7,M4S,Central Toronto,Davisville,Sandwich Place,Dessert Shop,Pizza Place,Gym,Italian Restaurant,Café,Coffee Shop,Sushi Restaurant,Gas Station,Brewery
8,M4T,Central Toronto,"Moore Park, Summerhill East",Gym,Trail,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",Pub,Coffee Shop,Sports Bar,Liquor Store,Supermarket,Vietnamese Restaurant,Sushi Restaurant,Bagel Shop,Light Rail Station,American Restaurant


In [52]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto_df_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto_df_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 0, 0, 0, 2, 0, 0, 0, 3, 0])

In [57]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = df_toronto.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="Postcode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(39, 16)


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Trail,Neighborhood,Health Food Store,Pub,Dog Run,Dessert Shop,Diner,Discount Store,Distribution Center,Yoga Studio
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Restaurant,Bookstore,Furniture / Home Store,Yoga Studio,Spa,Indian Restaurant
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Sandwich Place,Fast Food Restaurant,Pizza Place,Coffee Shop,Pub,Liquor Store,Light Rail Station,Burrito Place,Restaurant
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,American Restaurant,Bakery,Brewery,Gastropub,Yoga Studio,Fish Market,Pet Store,Park
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Park,Swim School,Bus Line,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


In [58]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(39, 16)


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0,Coffee Shop,Aquarium,Hotel,Café,Scenic Lookout,Sporting Goods Shop,Brewery,Fried Chicken Joint,Italian Restaurant,Restaurant
21,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Restaurant,Café,Hotel,American Restaurant,Gym,Japanese Restaurant,Italian Restaurant,Seafood Restaurant,Thai Restaurant
23,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,0,Park,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
24,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,Coffee Shop,Café,Sandwich Place,Pizza Place,Indian Restaurant,Pub,Donut Shop,BBQ Joint,History Museum,Flower Shop
25,M5S,Downtown Toronto,"University of Toronto, Harbord",43.662696,-79.400049,0,Café,Restaurant,Bar,Italian Restaurant,Japanese Restaurant,Bookstore,Bakery,Sandwich Place,Flower Shop,Beer Bar
26,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,0,Café,Bakery,Mexican Restaurant,Vietnamese Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Park,Gaming Cafe,Pizza Place,Bar
27,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,0,Airport Service,Airport Lounge,Airport Terminal,Boutique,Coffee Shop,Airport,Airport Food Court,Airport Gate,Sculpture Garden,Harbor / Marina
28,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846,0,Coffee Shop,Café,Seafood Restaurant,Cocktail Bar,Restaurant,Beer Bar,Japanese Restaurant,Hotel,Italian Restaurant,Pub
29,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,0,Coffee Shop,Café,Restaurant,Hotel,Gym,Asian Restaurant,Japanese Restaurant,Seafood Restaurant,Steakhouse,Salad Place
30,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Baby Store,Nightclub,Italian Restaurant,Diner,Candy Store,Restaurant,Coffee Shop


In [61]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postcode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [62]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Downtown Toronto,0,Coffee Shop,Aquarium,Hotel,Café,Scenic Lookout,Sporting Goods Shop,Brewery,Fried Chicken Joint,Italian Restaurant,Restaurant
21,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Hotel,American Restaurant,Gym,Japanese Restaurant,Italian Restaurant,Seafood Restaurant,Thai Restaurant
23,Central Toronto,0,Park,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
24,Central Toronto,0,Coffee Shop,Café,Sandwich Place,Pizza Place,Indian Restaurant,Pub,Donut Shop,BBQ Joint,History Museum,Flower Shop
25,Downtown Toronto,0,Café,Restaurant,Bar,Italian Restaurant,Japanese Restaurant,Bookstore,Bakery,Sandwich Place,Flower Shop,Beer Bar
26,Downtown Toronto,0,Café,Bakery,Mexican Restaurant,Vietnamese Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Park,Gaming Cafe,Pizza Place,Bar
27,Downtown Toronto,0,Airport Service,Airport Lounge,Airport Terminal,Boutique,Coffee Shop,Airport,Airport Food Court,Airport Gate,Sculpture Garden,Harbor / Marina
28,Downtown Toronto,0,Coffee Shop,Café,Seafood Restaurant,Cocktail Bar,Restaurant,Beer Bar,Japanese Restaurant,Hotel,Italian Restaurant,Pub
29,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Hotel,Gym,Asian Restaurant,Japanese Restaurant,Seafood Restaurant,Steakhouse,Salad Place
30,Downtown Toronto,0,Grocery Store,Café,Park,Baby Store,Nightclub,Italian Restaurant,Diner,Candy Store,Restaurant,Coffee Shop


In [63]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,1,Music Venue,Garden,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [64]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Park,Swim School,Bus Line,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
10,Downtown Toronto,2,Park,Playground,Trail,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


In [65]:

toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,3,Gym,Trail,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


In [66]:

toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,4,Trail,Neighborhood,Health Food Store,Pub,Dog Run,Dessert Shop,Diner,Discount Store,Distribution Center,Yoga Studio
