In [1]:
# conda install -c conda-forge geopy

In [2]:
import geopy
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [3]:
import pandas as pd
import requests
import numpy as np

### Read in data

In [4]:
tdata = pd.read_csv("Torontodata.csv")

In [5]:
tdata.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


### Use geopy library to get the latitude and longitude

In [6]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tl-toronto-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [7]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(tdata['Latitude'], tdata['Longitude'], tdata['Borough'], tdata['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

Reduce the number of Boroughs to districts only contain 'Toronto'

In [9]:
boroughlist = ['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']
treduced = tdata[tdata['Borough'].isin(boroughlist)].reset_index(drop=True)
treduced.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


### Create map of toronto and add markers

In [10]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, long, post, borough, neigh in zip(treduced['Latitude'], treduced['Longitude'], treduced['PostalCode'], treduced['Borough'], treduced['Neighbourhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

### Define Foursqure Credentials and Version

In [11]:
CLIENT_ID = 'JX3VYJDBGE2PJ1OSMPU3JFLGVHFZ0XA2QKTDD4V5CHES2YEL' # your Foursquare ID
CLIENT_SECRET = 'BKNBCD34ECBQC2JHCRR5GZWQFRCFNSNEPWIFA4ACI1SRVE52' # your Foursquare Secret
VERSION = '20200314' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JX3VYJDBGE2PJ1OSMPU3JFLGVHFZ0XA2QKTDD4V5CHES2YEL
CLIENT_SECRET:BKNBCD34ECBQC2JHCRR5GZWQFRCFNSNEPWIFA4ACI1SRVE52


### Get the top 100 venues within a radius of 500 meters

In [12]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(treduced['Latitude'], treduced['Longitude'], treduced['PostalCode'], treduced['Borough'], treduced['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [13]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [None]:
venues_df.groupby(['PostalCode', 'Borough', 'Neighborhood'])['VenueName'].count()

### Analyze venue in each area

In [None]:
# one hot encoding
treduced_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
treduced_onehot['PostalCode'] = venues_df['PostalCode'] 
treduced_onehot['Borough'] = venues_df['Borough'] 
treduced_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
col = list(treduced_onehot.columns[-3:]) + list(treduced_onehot.columns[:-3])
treduced_onehot = treduced_onehot[col]

treduced_onehot.head()

### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [None]:
tvenues_freq = treduced_onehot.groupby(['PostalCode', 'Borough', 'Neighborhood']).mean().reset_index()
tvenues_freq.head()

### Display the top 10 venues in each neighborhood

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaCol = ['PostalCode', 'Borough', 'Neighborhoods']
freqCol = []
for ind in np.arange(num_top_venues):
    try:
        freqCol.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqCol.append('{}th Most Common Venue'.format(ind+1))
columns = areaCol+freqCol

# create a new dataframe
nvenues_sorted = pd.DataFrame(columns=columns)
nvenues_sorted['PostalCode'] = tvenues_freq['PostalCode']
nvenues_sorted['Borough'] = tvenues_freq['Borough']
nvenues_sorted['Neighborhoods'] = tvenues_freq['Neighborhood']

for ind in np.arange(tvenues_freq.shape[0]):
    row_cat = tvenues_freq.iloc[ind, :].iloc[3:]
    row_cat_sorted = row_cat.sort_values(ascending=False)
    nvenues_sorted.iloc[ind, 3:] = row_cat_sorted.index.values[0:num_top_venues]

nvenues_sorted.sort_values(freqColumns, inplace=True)
nvenues_sorted

In [None]:
treduced.shape

### Use kmeans to cluster the neighborhood into 4 clusters

In [None]:
kclusters = 4

tvenues_freq_clustering = tvenues_freq.drop(['PostalCode', 'Borough', 'Neighborhood'], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tvenues_freq_clustering)

tcentral_clustered = treduced
tcentral_clustered['Cluster'] = kmeans.labels_[0:39]

tcentral_clustered = tcentral_clustered.join(nvenues_sorted.drop(['Borough', 'Neighborhoods'], 1).set_index('PostalCode'), on='PostalCode')
tcentral_clustered.sort_values(['Cluster'] + freqColumns, inplace=True)
tcentral_clustered

### Display the clustered map

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(tcentral_clustered['Latitude'], tcentral_clustered['Longitude'], tcentral_clustered['PostalCode'], tcentral_clustered['Borough'], tcentral_clustered['Neighbourhood'], tcentral_clustered['Cluster']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters