Import the necessary packages

In [27]:
import pandas as pd
import geocoder
import folium
import requests 
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

Download the data from the Wikipedia page and store it in a Pandas dataframe.

In [2]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data1 = pd.read_html(URL)[0]
data1.columns = data1.iloc[0]
data1 = data1.iloc[1:]
datafiltered = data1[~data1.Borough.str.contains("Not assigned")]
grouped = datafiltered.groupby(['Postcode']).agg({
                             'Borough': ', '.join, 
                             'Neighbourhood': ', '.join }).reset_index()
for index, row in grouped.iterrows():
    if (row['Neighbourhood']=="Not assigned"):
      row['Neighbourhood']=row['Borough']

Import coordinates

In [3]:
coords = pd.read_csv("Geospatial_Coordinates.csv")


Merge the two data sets

In [4]:
df_cd = pd.merge(grouped, coords, how='left', on = 'Postcode')

Visualise the results

In [5]:
df_cd

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,"Scarborough, Scarborough","Rouge, Malvern",43.806686,-79.194353
1,M1C,"Scarborough, Scarborough, Scarborough","Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,"Scarborough, Scarborough, Scarborough","Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,"Scarborough, Scarborough, Scarborough","East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,"Scarborough, Scarborough, Scarborough","Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,"Scarborough, Scarborough, Scarborough","Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,"Scarborough, Scarborough","Birch Cliff, Cliffside West",43.692657,-79.264848


Make toronto map

In [6]:
latitude = 43.7
longitude = -79.2
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)


    
map_newyork

Connect to Foursquare

In [7]:
# add markers to map
CLIENT_ID = '5UKMQFKKSXE5LCX4KOQC5CJ5WSBFFWL4ESOXPODP4QDCFR3U'
CLIENT_SECRET = 'GDC244JSZK1WYLBORYNLVBAJHQXGJPNLAFGRILKHFKGQWSMB'
VERSION = '20180605'

Get nearby venues

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius =500):
    LIMIT = 100 
    venues_list=[]
    
    for name,lat,lng in zip(names, latitudes, longitudes):
        url = 'http://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        radius, 
        LIMIT)
        results = requests.get(url).json()["response"]['groups'][0]['items']
  
        venues_list.append([(
         name,
         lat,
         lng,
         v['venue']['name'],
         v['venue']['location']['lat'],
         v['venue']['location']['lng'],
         v['venue']['categories'][0]['name']) for v in results])
       
        
        #nearby_venues = pd.DataFrame([ for item in venue_list])
        df = pd.DataFrame(columns = ['Neighbo rhood',
                                 'Neighborhood Latitude',
                                 'Neighborhood Longitude',
                                 'Venue',
                                 'Venue Latitude',
                                 'Venue Longitude',
                                 'Venue Category'])
        i = 0      
        for venue_list in venues_list:
                for item in venue_list:
                    df.loc[i] = [item[0], item[1],
                                 item[2], item[3],
                                 item[4], item[5],
                                 item[6]] 
                i = i +1
                if (i== 100):
                    break
                
    return df

        

Create Data frame of nearby venues

In [9]:
# type your answer here
toronto_venues = getNearbyVenues(names=df_cd['Neighbourhood'],
                                   latitudes=df_cd['Latitude'],
                                   longitudes=df_cd['Longitude']
                                  )


Visualise the results

In [10]:
toronto_venues.head(15)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Eggsmart,43.7678,-79.190466,Breakfast Spot
3,Woburn,43.770992,-79.216917,Al-Hamd Biryani & Pizza,43.767585,-79.21957,Indian Restaurant
4,Cedarbrae,43.773136,-79.239476,Popeyes Louisiana Kitchen,43.77593,-79.235328,Fried Chicken Joint
5,Scarborough Village,43.744734,-79.239476,McCowan Park,43.745089,-79.239336,Playground
6,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,Kennedy Station Passenger Pickup,43.732009,-79.264537,Bus Station
7,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,Clairlea Futbol Centre,43.715234,-79.286506,Soccer Field
8,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,Vincent's Spot,43.717002,-79.242353,American Restaurant
9,"Birch Cliff, Cliffside West",43.692657,-79.264848,Birchmount Stadium,43.695323,-79.261293,College Stadium


Create a dataframe of encoded features

In [11]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,American Restaurant,Antique Shop,Athletics & Sports,Bagel Shop,Bakery,Bank,Baseball Field,Boat or Ferry,Breakfast Spot,...,Recording Studio,Restaurant,River,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Trail,Video Store,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,American Restaurant,Antique Shop,Athletics & Sports,Bagel Shop,Bakery,Bank,Baseball Field,Boat or Ferry,...,Recording Studio,Restaurant,River,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Trail,Video Store,Wine Bar
0,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Agincourt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Alderwood, Long Branch",0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,"Bathurst Manor, Downsview North, Wilson Heights",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
5,Bayview Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Bedford Park, Lawrence Manor East",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Berczy Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Birch Cliff, Cliffside West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Cluster the results using kmeans

In [13]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 0, 0, 0, 0, 0, 4, 0, 0])

In [14]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [15]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
     neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
1,Agincourt,Skating Rink,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
3,"Alderwood, Long Branch",Skating Rink,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
4,"Bathurst Manor, Downsview North, Wilson Heights",Supermarket,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant


In [126]:
#add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_cd

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,"Scarborough, Scarborough","Rouge, Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Empanada Restaurant
1,M1C,"Scarborough, Scarborough, Scarborough","Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0.0,History Museum,Wine Bar,Gift Shop,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
2,M1E,"Scarborough, Scarborough, Scarborough","Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Breakfast Spot,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Indian Restaurant,Wine Bar,Gift Shop,Furniture / Home Store,Fried Chicken Joint,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Fried Chicken Joint,Wine Bar,Caribbean Restaurant,Furniture / Home Store,Food Truck,Food & Drink Shop,Flower Shop,Financial or Legal Service,Fast Food Restaurant,Empanada Restaurant


In [136]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7,
        parse_html=True).add_to(map_clusters)
       
map_clusters