# Assignment - Toronto data from Wiki page
*Submitted by Vikram Seshadri for Coursera Capstone Project Assignment Week 3*

#### First step is to import the necessary libraries to extract information from the *"List_of_postal_codes_of_Canada:_M"* wikipedia page.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [2]:
wiki_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(wiki_url)

In [3]:
wiki_table = soup.find("table",class_ = "wikitable sortable")
PostalCode = []
Borough = []
Neighborhood = []
ColumnTitles = []
titles = soup.findAll('th')
for i in range(0,3):
    ColumnTitles.append(titles[i].find(text=True).strip('\n'))

for row in soup.findAll("tr"):
    cells = row.findAll('td')
    if len(cells)==3: #Only extract table body not heading
        PostalCode.append(str(cells[0].find(text=True).strip('\n')))
        Borough.append(str(cells[1].find(text=True).strip('\n')))
        Neighborhood.append(str(cells[2].find(text=True).strip('\n')))      

#### Convert the lists into a dataframe using Pandas

In [4]:
Toronto_df = pd.DataFrame(columns=ColumnTitles)
Toronto_df['Postcode'] = PostalCode
Toronto_df['Borough'] = Borough
Toronto_df['Neighbourhood'] = Neighborhood
Toronto_df.rename(columns={"Neighbourhood" : "Neighborhood"},inplace = True)
Toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Remove the rows that does not have an assigned Borough

In [5]:
Toronto_new_df = Toronto_df[Toronto_df.Borough != 'Not assigned']

In [6]:
Toronto_new_df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### If a neighborhood is not assigned, assign the same name as that of the borough.

In [7]:
pd.set_option('mode.chained_assignment', None)
Toronto_new_df.Neighborhood = Toronto_new_df.Borough.where(Toronto_new_df.Neighborhood == 'Not assigned', Toronto_new_df.Neighborhood)
Toronto_new_df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [8]:
Toronto_new_df.shape

(211, 3)

#### Download the latitude and longitude locations for the corresponding postal codes from the *cocl* website.

In [9]:
postal_codes = Toronto_new_df['Postcode'].values.tolist()
LatLng = pd.read_csv('https://cocl.us/Geospatial_data')
LatLng.rename(columns={"Postal Code" : "Postcode"},inplace = True)
LatLng.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
Toronto_loc_df = Toronto_new_df.join(LatLng.set_index('Postcode'), on='Postcode')
Toronto_disp_df = Toronto_loc_df
Toronto_postal_agg = pd.DataFrame(Toronto_disp_df.groupby(['Postcode','Borough'])['Neighborhood'].apply(lambda x: ','.join(x)))
Toronto_postal_disp = Toronto_postal_agg.join(LatLng.set_index('Postcode'), on='Postcode')
Toronto_postal_disp.reset_index(inplace=True)

#### Look up the names of unique boroughs to identify the names of boroughs that have the word Toronto in it.

In [19]:
Toronto_postal_disp.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


#### Store the location details and Neighborhood details of the Boroughs that have the word Toronto in it.

In [20]:
Toronto_nbh = Toronto_postal_disp[Toronto_postal_disp['Borough'].str.contains('Toronto')].reset_index(drop=True)
Toronto_nbh.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",43.686412,-79.400049


In [21]:
Toronto_nbh.shape

(38, 5)

In [22]:
import json # library to handle JSON files
from geopy.geocoders import Nominatim 
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library
print("Packages Installed!")

Packages Installed!


#### Obtain the Latitude and Longitude location of Toronto.

In [23]:
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of  Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of  Toronto are 43.653963, -79.387207.


#### Using Folium, visualize the map of Toronto with the Boroughs containing the name Toronto in it.

In [24]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto_nbh['Latitude'], Toronto_nbh['Longitude'], Toronto_nbh['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

#### Provide Foursquare client ID, Client Secret, Version, and the limit of the search.

In [25]:
CLIENT_ID = 'KFQB4CA4OB0VCSZFMGBYRF3CWIUU5TVCPRNQXU3QXYXZ4VPT' # your Foursquare ID
CLIENT_SECRET = '2JP0S2ZVUZHSAXVJS53UYQOKZ0M1UE5SWDQ2GAMAA34EKNOC' # your Foursquare Secret
VERSION = '20190715' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KFQB4CA4OB0VCSZFMGBYRF3CWIUU5TVCPRNQXU3QXYXZ4VPT
CLIENT_SECRET:2JP0S2ZVUZHSAXVJS53UYQOKZ0M1UE5SWDQ2GAMAA34EKNOC


#### Function that will obtain the nearby venues for any number of places with their corresponding latitudes and longitudes. The function returns a dataframe that has both the neighborhood location, and venue location and category. This function can be used subsequently to obtain the details of the venues near the neighborhoods in Toronto.

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
Toronto_nbh.columns

Index(['Postcode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'], dtype='object')

In [28]:
Toronto_venues = getNearbyVenues(names=Toronto_nbh['Neighborhood'],
                                   latitudes=Toronto_nbh['Latitude'],
                                   longitudes=Toronto_nbh['Longitude']
                                  )

In [29]:
print(Toronto_venues.shape)
Toronto_venues.head()

(1707, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [30]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton,Exhibition Place,Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",17,17,17,17,17,17
"Cabbagetown,St. James Town",45,45,45,45,45,45
Central Bay Street,86,86,86,86,86,86
"Chinatown,Grange Park,Kensington Market",100,100,100,100,100,100
Christie,16,16,16,16,16,16
Church and Wellesley,88,88,88,88,88,88


In [31]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 236 uniques categories.


#### Converting the venue category into numerical dummy values for performing K-Means algorithm.

In [32]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")
Toronto_onehot.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### One of the columns of the dataframe *Toronto_onehot*  already has the name *Neighborhood*. Therefore when adding the neighborhood information to this dataframe, the column is named as *Neighborhoods*

In [33]:
Toronto_onehot.insert(0,'Neighborhoods', Toronto_venues['Neighborhood'], True)

In [34]:
Toronto_onehot.shape

(1707, 237)

#### Group the dataframe by neighborhood and take the mean of all the frequencies of occurence.

In [35]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhoods').mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
2,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
Toronto_grouped.shape

(38, 237)

#### Display the top five venues for each neighborhood.

In [37]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhoods']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhoods'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.05
2          Bar  0.04
3   Steakhouse  0.04
4        Hotel  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.11
1        Cocktail Bar  0.05
2      Farmers Market  0.04
3         Cheese Shop  0.04
4  Seafood Restaurant  0.04


----Brockton,Exhibition Place,Parkdale Village----
            venue  freq
0     Coffee Shop  0.09
1  Breakfast Spot  0.09
2            Café  0.09
3      Restaurant  0.05
4         Stadium  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.11
1       Auto Workshop  0.05
2                 Spa  0.05
3       Burrito Place  0.05
4             Butcher  0.05


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
              venue  freq
0   Airport Service  0.18
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3            

Function to sort the required number of venues in decending order of freqency.

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Use the above written function to obtain the top 10 venues from each neighborhood.

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = Toronto_grouped['Neighborhoods']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Gym,Restaurant,Hotel,Burger Joint,Thai Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Cheese Shop,Seafood Restaurant,Café,Bakery,Steakhouse,Museum
2,"Brockton,Exhibition Place,Parkdale Village",Breakfast Spot,Café,Coffee Shop,Performing Arts Venue,Intersection,Burrito Place,Convenience Store,Stadium,Caribbean Restaurant,Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Pizza Place,Recording Studio,Restaurant,Butcher,Burrito Place,Skate Park,Brewery
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Service,Airport Lounge,Airport Terminal,Plane,Harbor / Marina,Coffee Shop,Sculpture Garden,Boat or Ferry,Boutique,Bar


#### In this study, 7 clusters are chosen in order to get a better classification of Toronto neighborhoods

In [40]:
kclusters = 7

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhoods', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [41]:
len(kmeans.labels_)

38

In [42]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Gym,Restaurant,Hotel,Burger Joint,Thai Restaurant
1,1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Cheese Shop,Seafood Restaurant,Café,Bakery,Steakhouse,Museum
2,1,"Brockton,Exhibition Place,Parkdale Village",Breakfast Spot,Café,Coffee Shop,Performing Arts Venue,Intersection,Burrito Place,Convenience Store,Stadium,Caribbean Restaurant,Restaurant
3,1,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Pizza Place,Recording Studio,Restaurant,Butcher,Burrito Place,Skate Park,Brewery
4,1,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Service,Airport Lounge,Airport Terminal,Plane,Harbor / Marina,Coffee Shop,Sculpture Garden,Boat or Ferry,Boutique,Bar
5,1,"Cabbagetown,St. James Town",Coffee Shop,Café,Restaurant,Pizza Place,Italian Restaurant,Pub,Bakery,Bistro,Dive Bar,Butcher
6,1,Central Bay Street,Coffee Shop,Sandwich Place,Café,Ice Cream Shop,Italian Restaurant,Burger Joint,Chinese Restaurant,Bar,Spa,Japanese Restaurant
7,1,"Chinatown,Grange Park,Kensington Market",Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bakery,Mexican Restaurant,Vietnamese Restaurant,Bar,Dumpling Restaurant,Coffee Shop,Caribbean Restaurant
8,1,Christie,Grocery Store,Café,Park,Diner,Nightclub,Convenience Store,Baby Store,Italian Restaurant,Coffee Shop,Athletics & Sports
9,1,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Hotel,Gastropub,Fast Food Restaurant,Mediterranean Restaurant,Men's Store


#### Before merging the column *Neighborhood* from *neighborhood_venues_sorted*, rename the column *Neighborhood* to *Neighborhoods* in Toronto_merged dataframe.

In [43]:
Toronto_merged = Toronto_nbh
Toronto_merged.rename(columns = {'Neighborhood':'Neighborhoods'},inplace=True)
Toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhoods,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [44]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhoods'), on='Neighborhoods')
Toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,5,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Dumpling Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Pizza Place,Bookstore,Brewery,Bubble Tea Shop
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572,0,Park,Sandwich Place,Burger Joint,Food & Drink Shop,Steakhouse,Fish & Chips Shop,Fast Food Restaurant,Board Shop,Brewery,Burrito Place
3,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Park,Brewery,Seafood Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Swim School,Bus Line,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


In [None]:
Toronto_merged.shape

#### Folium map with locations colored corresponding to different clusters.

In [45]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)+5))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhoods'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Cluster 1:

In [46]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East Toronto,0,Park,Sandwich Place,Burger Joint,Food & Drink Shop,Steakhouse,Fish & Chips Shop,Fast Food Restaurant,Board Shop,Brewery,Burrito Place
5,Central Toronto,0,Food & Drink Shop,Breakfast Spot,Gym,Hotel,Park,Sandwich Place,Clothing Store,Eastern European Restaurant,Dog Run,Doner Restaurant


#### Cluster 2:
* Contains Coffee shop and cafe in top 5
* All Boroughs have some common venues in them

In [47]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Pizza Place,Bookstore,Brewery,Bubble Tea Shop
3,East Toronto,1,Café,Coffee Shop,Gastropub,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Park,Brewery,Seafood Restaurant
6,Central Toronto,1,Coffee Shop,Clothing Store,Yoga Studio,Italian Restaurant,Chinese Restaurant,Rental Car Location,Dessert Shop,Diner,Salon / Barbershop,Mexican Restaurant
7,Central Toronto,1,Coffee Shop,Sandwich Place,Dessert Shop,Pizza Place,Café,Sushi Restaurant,Italian Restaurant,Thai Restaurant,Brewery,Seafood Restaurant
9,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,Liquor Store,Sports Bar,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Restaurant
11,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Pizza Place,Italian Restaurant,Pub,Bakery,Bistro,Dive Bar,Butcher
12,Downtown Toronto,1,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Hotel,Gastropub,Fast Food Restaurant,Mediterranean Restaurant,Men's Store
13,Downtown Toronto,1,Coffee Shop,Bakery,Park,Pub,Café,Theater,Restaurant,Mexican Restaurant,Breakfast Spot,Gym / Fitness Center
14,Downtown Toronto,1,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Diner,Bubble Tea Shop,Plaza,Ice Cream Shop,Italian Restaurant
15,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Hotel,Italian Restaurant,Beer Bar,Gastropub,Breakfast Spot,Cosmetics Shop,Cocktail Bar


#### Cluster 3:
* All of them are in Central Toronto

In [48]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,2,Garden,Yoga Studio,Fish Market,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


#### Cluster 4:

In [49]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,3,Park,Swim School,Bus Line,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


#### Cluster 5:
* Only one venue in central Toronto popular for Playground and Park

In [50]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,4,Playground,Park,Summer Camp,Tennis Court,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
10,Downtown Toronto,4,Park,Playground,Trail,Building,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


#### Cluster 6:
* Location in East Toronto popular for Health Food.

In [51]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 5, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,5,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Dumpling Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop


#### Cluster 7:

In [52]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 6, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,6,Mexican Restaurant,Trail,Sushi Restaurant,Jewelry Store,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


*This marks the end of this Jupyter Notebook that analyzes the neighborhoods in Toronto, Canada using KMeans Algorithm.*