### 1. Download dataframe for Toronto boroughs with coordinates from week 1-3

In [1]:
#!conda install -c conda-forge geopy --yes

In [2]:
#!conda install -c conda-forge folium=0.5.0 --yes

In [3]:
# Data wrangling modules
import io
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

Import plotting libraries

In [4]:
import json # For handling JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # Map rendering library 

Fetch **Borough** data from dataframe from Github

In [5]:
url = 'https://raw.githubusercontent.com/wgova/Recommender-Central-Toronto/master/Recommender%20-%20Central%20Toronto%20Accomodation/Data/toronto_df.csv'
toronto_df = pd.read_csv(url)
toronto_df.head(3)

Unnamed: 0.1,Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711


In [6]:
print('Uniques Boroughs: {} .'.format(len(toronto_df['Borough'].unique())))
print('List of different categories:')
list(toronto_df['Borough'].unique())

Uniques Boroughs: 11 .
List of different categories:


['Scarborough',
 'North York',
 'East York',
 'East Toronto',
 'Central Toronto',
 'Downtown Toronto',
 'York',
 'West Toronto',
 "Queen's Park",
 'Mississauga',
 'Etobicoke']

Plot geotagged map of Toronto with Toronto Boroughs markers 

In [7]:
# create empty map of Toronto using latitude and longitude values
lat = 43.7615390 ; lon = -79.411079
Toronto_boroughs_map = folium.Map(location=[lat, lon], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], 
                           toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_boroughs_map)  
    
Toronto_boroughs_map

In [13]:
#Central Toronto

# selecting only neighborhoods related to "Central Toronto" borough.
toronto = toronto_df[toronto_df['Borough'] == 'Central Toronto']
toronto = toronto_df.reset_index(drop=True).drop(columns = 'Unnamed: 0')
toronto.head(3)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711


### 2. Make an API call to Foursquare to get information on venues in Central Toronto

In [1]:
Client_ID = 'User_name'
Client_Secret = 'token'
Version = '20190721'

Define a function to fetch coordinates from FOURSQUARE

In [10]:
def api_call_4sqr (postal_code_list, neighbourhood_list, lat_list, lng_list, LIMIT = 50000, radius = 10000):
    api = []
    counter = 0
    for postal_code, neighbourhood, lat, lng in zip(postal_code_list, neighbourhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            Client_ID, Client_Secret, Version, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        api_dict = {}
        api_dict['Postalcode'] = postal_code; api_dict['Neighbourhood(s)'] = neighbourhood; 
        api_dict['Latitude'] = lat; api_dict['Longitude'] = lng;
        api_dict['API_calls'] = results;
        api.append(api_dict)
        counter += 1
    return api;

In [14]:
Toronto_4sqr = api_call_4sqr(list(toronto['Postalcode']),list(toronto['Neighbourhood']),
                             list(toronto['Latitude']),list(toronto['Longitude']),)

In [15]:
#Use pickle to pickle results and store them in local drive
import pickle
with open("Toronto_4sqr.txt", "wb") as fp:   
    pickle.dump(Toronto_4sqr, fp)   
# Unpickle results
with open("Toronto_4sqr.txt", "rb") as fp:   
    Toronto_4sqr = pickle.load(fp)

print("Foursquare API call results saved in: ")
%pwd

Foursquare API call results saved in: 


'C:\\Users\\16353\\Documents\\Coding\\Projects\\Courses projects\\Recommender-Central-Toronto'

Plot the map of Central Toronto superimposed onto the one for Toronto

In [16]:
from folium import IFrame
lat = 43.72021610 ; lon = -79.5395769
map_Toronto_Central = folium.Map(location=[lat, lon], zoom_start=10.5)

# Add a popup label for the map
text = 'Boroughs in Central Toronto'
iframe = folium.IFrame(text, width=700, height=50)
popup = folium.Popup(iframe, max_width=1000)
Text = folium.Marker(location=[lat,lon], popup=popup,
                     icon=folium.Icon(icon_color='green'))

# add markers to map
for lat, lng, label in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng], radius = 6, popup = label, color ='red', fill = True,fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(Toronto_boroughs_map)

Toronto_boroughs_map.add_child(Text)
Toronto_boroughs_map.save('dToronto_Central.html')
Toronto_boroughs_map


### Data cleaning to convert the raw Foursquare data into dataframes 

In [17]:
# Function to create dataframe
def venues_results(Toronto_4sqr):
    venues_df = pd.DataFrame(columns = ['Postalcode', 'Neighbourhood','Latitude', 'Longitude','Venue', 
                                        'Tips for venue', 'Venue Category', 'Distance'])
    
    for hood_dict in Toronto_4sqr: # Store neighborhood names in a dictionary
        postal_code = hood_dict['Postalcode']; 
        hood = hood_dict['Neighbourhood(s)'];
        lat = hood_dict['Latitude']; 
        lon = hood_dict['Longitude']
                
        for venue_dict in hood_dict['API_calls']: #Store venue names in a dictionary
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            distance = venue_dict['venue']['location']['distance']
            category =  venue_dict['venue']['categories'][0]['name']
            
            venues_df = venues_df.append({
                'Postalcode': postal_code, 'Neighbourhood': hood,'Latitude': lat, 'Longitude':lon,
                'Venue': name, 'Tips for venue': summary,'Venue Category': category, 'Distance': distance
            },ignore_index = True)
    return(venues_df)

Save a copy of the dataframe results to a csv file

In [18]:
venues_toronto = venues_results(Toronto_4sqr)

In [19]:
venues = venues_toronto['Venue'].unique()
venues = len(venues) ; print('Venues:',venues)
hoods = venues_toronto['Neighbourhood'].unique()
hoods = len(hoods) ; print('Neighbourhoods:', hoods)


Venues: 1002
Neighbourhoods: 103


In [20]:
venues_toronto.to_csv('Tips - Central Toronto.csv')
venues_toronto.tail()

Unnamed: 0,Postalcode,Neighbourhood,Latitude,Longitude,Venue,Tips for venue,Venue Category,Distance
10295,M9W,Northwest,43.706748,-79.594054,Starbucks,This spot is popular,Coffee Shop,9324
10296,M9W,Northwest,43.706748,-79.594054,St. James's Gate Irish Pub,This spot is popular,Pub,8536
10297,M9W,Northwest,43.706748,-79.594054,Creme de la Creme,This spot is popular,Breakfast Spot,9435
10298,M9W,Northwest,43.706748,-79.594054,Mazaj Lounge,This spot is popular,Hookah Bar,7840
10299,M9W,Northwest,43.706748,-79.594054,Apache Burger,This spot is popular,Burger Joint,8775


In [21]:
venues_toronto = pd.read_csv('Tips - Central Toronto.csv')
#venues_toronto.head()
list(venues_toronto['Venue Category'].unique())

['Athletics & Sports',
 'Zoo Exhibit',
 'Zoo',
 'Bakery',
 'Park',
 'Burger Joint',
 'Caribbean Restaurant',
 'Italian Restaurant',
 'Dessert Shop',
 'Hakka Restaurant',
 'Sandwich Place',
 'Chinese Restaurant',
 'Indian Restaurant',
 'Fried Chicken Joint',
 'Coffee Shop',
 'Sri Lankan Restaurant',
 'Breakfast Spot',
 'Campground',
 'Farm',
 'Liquor Store',
 'Spa',
 'Food & Drink Shop',
 'Golf Course',
 'Sports Bar',
 'Japanese Restaurant',
 'Beach',
 'Pub',
 'Asian Restaurant',
 'Sporting Goods Shop',
 'Supermarket',
 'BBQ Joint',
 'Noodle House',
 'Steakhouse',
 'Tea Room',
 'Butcher',
 'Gym',
 'Cosmetics Shop',
 'Mexican Restaurant',
 'Pharmacy',
 'Community Center',
 'Fast Food Restaurant',
 'Arts & Crafts Store',
 'Grocery Store',
 'Ice Cream Shop',
 'Toy / Game Store',
 'Vegetarian / Vegan Restaurant',
 'Bookstore',
 'Thai Restaurant',
 'Music Store',
 'Fish & Chips Shop',
 'Middle Eastern Restaurant',
 'American Restaurant',
 'Clothing Store',
 'Warehouse Store',
 'Sushi Restaur

In [22]:
# one hot encoding
venues_toronto_hc = pd.get_dummies(data = venues_toronto, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
venues_toronto_hc.head()
#venues_toronto['Venue Category'].uniques()

Unnamed: 0.1,Unnamed: 0,Postalcode,Neighbourhood,Latitude,Longitude,Venue,Tips for venue,Distance,Afghan Restaurant,Airport Lounge,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,0,M1B,"Rouge, Malvern",43.806686,-79.194353,Toronto Pan Am Sports Centre,This spot is popular,1788,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,M1B,"Rouge, Malvern",43.806686,-79.194353,African Rainforest Pavilion,This spot is popular,1509,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2,M1B,"Rouge, Malvern",43.806686,-79.194353,Toronto Zoo,This spot is popular,1857,0,0,...,0,0,0,0,0,0,0,0,1,0
3,3,M1B,"Rouge, Malvern",43.806686,-79.194353,Polar Bear Exhibit,This spot is popular,1999,0,0,...,0,0,0,0,0,0,0,0,1,0
4,4,M1B,"Rouge, Malvern",43.806686,-79.194353,penguin exhibit,This spot is popular,1571,0,0,...,0,0,0,0,0,0,0,0,0,1


Load the dataframe for further analysis

In [23]:
features = ['Neighbourhood',
            'Zoo Exhibit','Zoo','Park','Dessert Shop','Burger Joint','Campground','Hakka Restaurant','Coffee Shop',
            'Fried Chicken Joint','Sandwich Place','Italian Restaurant','Chinese Restaurant','BBQ Joint','Steakhouse',
            'Golf Course','Liquor Store','Sports Bar','Food & Drink Shop','Pub','Beach','Pharmacy','Sporting Goods Shop',
            'Supermarket','Arts & Crafts Store','Grocery Store','Toy / Game Store','Breakfast Spot','Noodle House',
            'Community Center','Ice Cream Shop','Fast Food Restaurant','Bookstore','Gym','American Restaurant','Tea Room',
            'Cosmetics Shop','Wings Joint','Sushi Restaurant','Seafood Restaurant','Gym / Fitness Center','Fish & Chips Shop',
            'Restaurant','Pizza Place','Bistro','Smoothie Shop','Thai Restaurant','Hotpot Restaurant',
            'Café','History Museum','Clothing Store','Gastropub','Indie Movie Theater','Shopping Mall',
            'Bagel Shop','Other Great Outdoors','Movie Theater','Playground','General Entertainment','Gourmet Shop',
            'Comic Shop','Farmers Market','Beer Bar','Burrito Place','Cocktail Bar','Historic Site','French Restaurant',
            'Trail','Taco Place','Diner','Skating Rink','Bubble Tea Shop','Auto Dealership','Shopping Plaza','Food','Road',
            'New American Restaurant','Rock Climbing Spot','Hobby Shop','Furniture / Home Store',
            'Paper / Office Supplies Store','Deli / Bodega','Health Food Store','Ski Chalet','Hardware Store',
            'Recreation Center','Garden','Electronics Store','Department Store','Juice Bar','Museum','Food Court','Bar','Theater',
            'Snack Place','Soup Place','Train Station','Theme Park','Theme Park Ride / Attraction','Shoe Store',
            'Chocolate Shop','Tapas Restaurant','Organic Grocery','Comedy Club','Nightclub',
            'Convenience Store']

venues = venues_toronto_hc[features].groupby('Neighbourhood').sum()

venues.head()

Unnamed: 0_level_0,Zoo Exhibit,Zoo,Park,Dessert Shop,Burger Joint,Campground,Hakka Restaurant,Coffee Shop,Fried Chicken Joint,Sandwich Place,...,Train Station,Theme Park,Theme Park Ride / Attraction,Shoe Store,Chocolate Shop,Tapas Restaurant,Organic Grocery,Comedy Club,Nightclub,Convenience Store
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Adelaide, King, Richmond",0,0,4,1,0,0,0,8,0,3,...,1,0,0,0,0,0,1,1,0,0
Agincourt,2,2,4,0,3,0,3,4,0,1,...,0,0,0,0,0,0,0,0,0,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,0,3,3,3,0,2,4,0,1,...,0,0,0,0,0,0,0,0,0,0
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",0,0,2,0,4,0,0,2,0,3,...,2,0,0,0,0,0,0,0,1,1
"Alderwood, Long Branch",0,0,5,2,4,0,0,6,0,1,...,0,0,0,0,0,1,0,0,0,0


In [24]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 7, random_state = 0).fit(venues)

In [25]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = venues.columns
means_df.index = ['Group 1','Group 2','Group 3','Group 4', 'Group 5', 'Group 6', 'Group 7']
means_df['Total Sum'] = means_df.sum(axis = 1)
mean = means_df['Total Sum']
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Zoo Exhibit,Zoo,Park,Dessert Shop,Burger Joint,Campground,Hakka Restaurant,Coffee Shop,Fried Chicken Joint,Sandwich Place,...,Theme Park,Theme Park Ride / Attraction,Shoe Store,Chocolate Shop,Tapas Restaurant,Organic Grocery,Comedy Club,Nightclub,Convenience Store,Total Sum
Group 2,1.110223e-16,0.1818182,3.090909,0.636364,2.818182,1.0408340000000001e-17,5.5511150000000004e-17,3.727273,2.775558e-17,1.818182,...,0.09090909,0.1818182,0.1818182,5.5511150000000004e-17,0.3636364,5.5511150000000004e-17,5.5511150000000004e-17,0.4545455,0.1818182,67.636364
Group 3,5.5511150000000004e-17,2.775558e-17,7.928571,1.214286,0.07142857,1.0408340000000001e-17,5.5511150000000004e-17,5.428571,0.07142857,2.428571,...,1.734723e-18,3.469447e-18,1.0408340000000001e-17,0.07142857,1.5,0.6428571,5.5511150000000004e-17,2.0816680000000002e-17,3.469447e-18,66.928571
Group 5,2.916667,1.166667,5.916667,0.833333,3.166667,0.25,1.833333,4.333333,0.5833333,1.25,...,1.734723e-18,3.469447e-18,1.0408340000000001e-17,5.5511150000000004e-17,1.110223e-16,5.5511150000000004e-17,5.5511150000000004e-17,2.0816680000000002e-17,3.469447e-18,66.833333
Group 6,5.5511150000000004e-17,2.775558e-17,4.384615,1.615385,2.153846,1.0408340000000001e-17,5.5511150000000004e-17,4.076923,0.4615385,1.230769,...,1.734723e-18,3.469447e-18,1.0408340000000001e-17,0.1538462,1.110223e-16,5.5511150000000004e-17,5.5511150000000004e-17,2.0816680000000002e-17,3.469447e-18,66.461538
Group 4,-5.5511150000000004e-17,2.775558e-17,6.2,1.35,0.4,6.938894e-18,2.775558e-17,3.9,2.775558e-17,3.15,...,1.734723e-18,3.469447e-18,0.05,2.775558e-17,1.7,0.4,0.05,0.05,3.469447e-18,65.35
Group 1,0.0,2.775558e-17,9.0625,0.75,0.4375,1.0408340000000001e-17,5.5511150000000004e-17,7.4375,2.775558e-17,1.3125,...,1.734723e-18,3.469447e-18,1.0408340000000001e-17,0.625,0.0,5.5511150000000004e-17,0.0625,2.0816680000000002e-17,3.469447e-18,65.0
Group 7,0.0,2.775558e-17,4.352941,1.647059,4.440892e-16,1.0408340000000001e-17,5.5511150000000004e-17,7.529412,2.775558e-17,3.411765,...,1.734723e-18,3.469447e-18,1.0408340000000001e-17,0.5294118,0.4117647,0.7058824,1.0,2.0816680000000002e-17,3.469447e-18,60.470588


In [26]:
neigh_summary = pd.DataFrame([venues.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighbourhood', 'Group']

In [27]:
a = neigh_summary[neigh_summary['Group'] == 5]
a = neigh_summary[neigh_summary['Group'] == 2]

In [28]:
def locate_best(neighbourhood,group):
    ad = []
    counter = 0
    for neighbourhood, group in zip(neighbourhood,group):
         
        # make the GET request
        ad_dict = {}
        ad_dict['Neighbourhood'] = neighbourhood; 
        ad_dict['Group'] = group
        ad.append(ad_dict)
        counter += 1
    return ad;

In [29]:
best = locate_best(neigh_summary[neigh_summary['Group'] == 5],neigh_summary[neigh_summary['Group'] == 5])
print(list(best))

[{'Neighbourhood': 'Neighbourhood', 'Group': 'Neighbourhood'}, {'Neighbourhood': 'Group', 'Group': 'Group'}]


In [30]:
name_of_neigh = neigh_summary[neigh_summary['Group'] == 5]
a.head()
#venues[venues['Neighbourhood'] == name_of_neigh].iloc[0,1:5].to_dict()

Unnamed: 0,Neighbourhood,Group
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",2
4,"Alderwood, Long Branch",2
10,"Bloordale Gardens, Eringate, Markland Wood, Ol...",2
17,Canada Post Gateway Processing Centre,2
26,"Cloverdale, Islington, Martin Grove, Princess ...",2


In [31]:
prefs_df = pd.merge(venues_toronto, name_of_neigh, on='Neighbourhood')

In [32]:
prefs_df.reset_index(drop=True).drop(columns = 'Unnamed: 0').head()

Unnamed: 0,Postalcode,Neighbourhood,Latitude,Longitude,Venue,Tips for venue,Venue Category,Distance,Group
0,M1B,"Rouge, Malvern",43.806686,-79.194353,Toronto Pan Am Sports Centre,This spot is popular,Athletics & Sports,1788,5
1,M1B,"Rouge, Malvern",43.806686,-79.194353,African Rainforest Pavilion,This spot is popular,Zoo Exhibit,1509,5
2,M1B,"Rouge, Malvern",43.806686,-79.194353,Toronto Zoo,This spot is popular,Zoo,1857,5
3,M1B,"Rouge, Malvern",43.806686,-79.194353,Polar Bear Exhibit,This spot is popular,Zoo,1999,5
4,M1B,"Rouge, Malvern",43.806686,-79.194353,penguin exhibit,This spot is popular,Zoo Exhibit,1571,5


In [33]:
from folium import IFrame
#lat = 43.72021610 ; lon = -79.5395769
map_Toronto_Central = folium.Map(location=[lat, lon], zoom_start=10.5)

# Add a popup label for the map
text = 'Proximal neighbourhoods to stay in Central Toronto'
iframe = folium.IFrame(text, width=700, height=50)
popup = folium.Popup(iframe, max_width=1000)
Text = folium.Marker(location=[lat,lon], popup=popup,
                     icon=folium.Icon(icon_color='green'))

# add markers to map
for lat, lng, label in zip(prefs_df['Latitude'], prefs_df['Longitude'], prefs_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng], radius = 6, popup = label, color ='red', fill = True,fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(Toronto_boroughs_map)

Toronto_boroughs_map.add_child(Text)
Toronto_boroughs_map.save('Destination Toronto_Central.html')
Toronto_boroughs_map
