## Usha Manoharan
## Segmenting and Clustering Neighborhoods in Toronto

In this notebook we will build code to 
* scrape the web to obtain data about the Neighborhoods of Toronto 
* use postal codes to obtain the latitude and longitude of the Neighborhoods
* analyze the neighborhoods 

## 1. Scrape the Web to obtain neighborhood data of Toronto

In [184]:
import pandas as pd
import numpy as np

import requests # library to handle requests
import random # library for random number generation

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

In [185]:
# Read the html table into a dataframe
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs = pd.read_html(url, header=0)

# The first table is the data for toronto city neighborhood 
toronto_data = dfs[0]

# Remove any row which does not have a value assigned for Borough
toronto_data.drop(toronto_data[toronto_data['Borough'] == 'Not assigned'].index, inplace=True)
# reset the index
toronto_data.reset_index(inplace=True, drop=True)


# If neighbourhood has a"Not assigned", replace it with the value of the "borough" of that row
toronto_data.loc[toronto_data['Neighborhood'] == 'Not assigned', 'Neighborhood'] = toronto_data['Borough']

# group by postalcode and concat the neighborhood values
toronto_data.groupby('Postal Code')

# check the number of rows in the table
toronto_data.shape

(103, 3)

In [186]:
# Examine the first few rows of data
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## 2. Get the latitude and longitude for each neighborhood

In [187]:
lat_lng_df = pd.read_csv(r'~/Downloads/Geospatial_Coordinates.csv')

# merge on postal code
merged_tdata = pd.merge(left=toronto_data, right=lat_lng_df, left_on='Postal Code', right_on='Postal Code')
merged_tdata.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## 3. Let's Analyze the Downtown Toronto area
### a. Let's search for Thai restaurants at Downtown Toronto location with latitude = 43.654260, longitude = -79.360636 and show it on a map.

In [188]:
CLIENT_ID = 'RH22D4PCG0P0WNNVLKO0ML4EUJ0T01KJBSXGTT4PHOIDAFDC-' 
CLIENT_SECRET = 'J51KSD11D5BGYWHJJIHMSEOGAMUIXJPXSFTV4HIMCJT0PURV-'
VERSION = '20200707'
LIMIT = 30

In [189]:
search_query = 'Thai'
radius = 500
latitude = 43.654260
longitude = -79.360636
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

In [190]:
results = requests.get(url).json()

In [191]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
vdf = json_normalize(venues)

In [192]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in vdf.columns if col.startswith('location.')] + ['id']
vdf_filtered = vdf.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
vdf_filtered['categories'] = vdf_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
vdf_filtered.columns = [column.split('.')[-1] for column in vdf_filtered.columns]
vdf_filtered = vdf_filtered[vdf_filtered.categories == 'Thai Restaurant']
vdf_filtered

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id
0,Mengrai Gourmet Thai,Thai Restaurant,82 Ontario St,43.654102,-79.3668,"[{'label': 'display', 'lat': 43.654102, 'lng': -79.3668}]",496,M5A 2V3,CA,Toronto,ON,Canada,"[82 Ontario St, Toronto ON M5A 2V3, Canada]",,58718c5b809a7743ecda3164
1,Sukhothai,Thai Restaurant,490 Front Street East,43.653701,-79.354447,"[{'label': 'display', 'lat': 43.653700903664294, 'lng': -79.3544465359079}]",502,M5A 1H7,CA,Toronto,ON,Canada,"[490 Front Street East, Toronto ON M5A 1H7, Canada]",,59cd8a18e179105e08f003c6
3,Sukhothai,Thai Restaurant,274 Parliament St,43.658444,-79.365681,"[{'label': 'display', 'lat': 43.658444465539525, 'lng': -79.36568085121628}]",618,M5A 2Z6,CA,Toronto,ON,Canada,"[274 Parliament St (btwn Coatsworth St. & Shuter St.), Toronto ON M5A 2Z6, Canada]",btwn Coatsworth St. & Shuter St.,4ada57aff964a520972121e3


In [193]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=14) #generate map 
# add a red circle marker to represent the address at the given latitude, longitude
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='My location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Thai restaurants as blue circle markers
for lat, lng, label in zip(vdf_filtered.lat, vdf_filtered.lng, vdf_filtered.name):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

### b. Let's get the Ratings of all three Thai restaurants near My location

In [194]:
for venue_id in vdf_filtered.id :
    #venue_id = '4ada57aff964a520972121e3' # ID of Sukothat at Parliament St.
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    result = requests.get(url).json()

    try:
        print('{},{}'.format(result['response']['venue']['name'], result['response']['venue']['location']['address']))
        print('Ratings: {}'.format(result['response']['venue']['rating']))
    except:
        print('This venue has not been rated yet.')

Mengrai Gourmet Thai,82 Ontario St
Ratings: 5.8
Sukhothai,490 Front Street East
Ratings: 6.6
Sukhothai,274 Parliament St
Ratings: 8.4


### c. Let's get the tips and features associated to "Sukothai" at Parliament St, which has the best ratings.

In [195]:
#Get the number of tips
result['response']['venue']['tips']['count']

53

In [196]:
## Sukothai at Parliament St, Tips
limit = 2 # set limit to 2
venue_id = '4ada57aff964a520972121e3' # ID of Sukothat at Parliament St.
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f06af457b9ef927ef8446cf'},
 'response': {'tips': {'count': 53,
   'items': [{'id': '563a2093cd10d1146b83d990',
     'createdAt': 1446650003,
     'text': "Can't go wrong with the house pad Thai! Prices also seem cheaper than their other locations, maybe due to location.",
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/563a2093cd10d1146b83d990',
     'photo': {'id': '563a213fcd10509fe847ee18',
      'createdAt': 1446650175,
      'source': {'name': 'Foursquare for iOS',
       'url': 'https://foursquare.com/download/#/iphone'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/126247549_rkK5spH3F8LZH8T9wHcTw7pHIIZVRVRARpI5Ob5s57w.jpg',
      'width': 1440,
      'height': 1440,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/126247549_rkK5spH3F8LZH8T9wHcTw7pHIIZVRVRARpI5Ob5s57w.jpg',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView

In [197]:
#Get tips and associated features
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

In [198]:
#format and display tips
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Can't go wrong with the house pad Thai! Prices also seem cheaper than their other locations, maybe due to location.",1,0,563a2093cd10d1146b83d990,🙊🙉🙈,,,126247549


## 4. Explore around "Sukothai @ Parliament St"

In [199]:
latitude = 43.658444
longitude = -79.365681
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=RH22D4PCG0P0WNNVLKO0ML4EUJ0T01KJBSXGTT4PHOIDAFDC&client_secret=J51KSD11D5BGYWHJJIHMSEOGAMUIXJPXSFTV4HIMCJT0PURV&ll=43.658444,-79.365681&v=20200707&radius=500&limit=30'

In [201]:
results = requests.get(url).json()
'There are {} around Sukothai restaurant.'.format(len(results['response']['groups'][0]['items']))

'There are 29 around Sukothai restaurant.'

In [202]:
items = results['response']['groups'][0]['items']
items[0]

{'reasons': {'count': 0,
  'items': [{'summary': 'This spot is popular',
    'type': 'general',
    'reasonName': 'globalInteractionReason'}]},
 'venue': {'id': '4ada57aff964a520972121e3',
  'name': 'Sukhothai',
  'location': {'address': '274 Parliament St',
   'crossStreet': 'btwn Coatsworth St. & Shuter St.',
   'lat': 43.658444465539525,
   'lng': -79.36568085121628,
   'labeledLatLngs': [{'label': 'display',
     'lat': 43.658444465539525,
     'lng': -79.36568085121628}],
   'distance': 0,
   'postalCode': 'M5A 2Z6',
   'cc': 'CA',
   'city': 'Toronto',
   'state': 'ON',
   'country': 'Canada',
   'formattedAddress': ['274 Parliament St (btwn Coatsworth St. & Shuter St.)',
    'Toronto ON M5A 2Z6',
    'Canada']},
  'categories': [{'id': '4bf58dd8d48988d149941735',
    'name': 'Thai Restaurant',
    'pluralName': 'Thai Restaurants',
    'shortName': 'Thai',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/thai_',
     'suffix': '.png'},
    'primary': True}],
  

In [203]:
dataframe = json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(10)

Unnamed: 0,name,categories,address,crossStreet,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,neighborhood,id
0,Sukhothai,Thai Restaurant,274 Parliament St,btwn Coatsworth St. & Shuter St.,43.658444,-79.365681,"[{'label': 'display', 'lat': 43.658444465539525, 'lng': -79.36568085121628}]",0,M5A 2Z6,CA,Toronto,ON,Canada,"[274 Parliament St (btwn Coatsworth St. & Shuter St.), Toronto ON M5A 2Z6, Canada]",,4ada57aff964a520972121e3
1,I Love Churros,Food Truck,1017 Dufferin St,Bloor St West,43.658364,-79.365583,"[{'label': 'display', 'lat': 43.65836361668554, 'lng': -79.36558277878224}]",11,M6H 4B5,CA,Toronto,ON,Canada,"[1017 Dufferin St (Bloor St West), Toronto ON M6H 4B5, Canada]",,50899387e4b055a756b254fc
2,Figs Breakfast & Lunch,Breakfast Spot,344 Queen St. E.,at Parliament St.,43.655675,-79.364503,"[{'label': 'display', 'lat': 43.65567455427388, 'lng': -79.3645032892494}]",322,M5A 1S8,CA,Toronto,ON,Canada,"[344 Queen St. E. (at Parliament St.), Toronto ON M5A 1S8, Canada]",,4af59046f964a520e0f921e3
3,Regent Park Aquatic Centre,Pool,640 Dundas St. East,at Sumach St.,43.6606,-79.361392,"[{'label': 'display', 'lat': 43.6606003130796, 'lng': -79.36139162915349}]",420,M5A 2B8,CA,Toronto,ON,Canada,"[640 Dundas St. East (at Sumach St.), Toronto ON M5A 2B8, Canada]",,5092fe87e4b0476c6e7375e2
4,Ontario Restaurant,Diner,383 dundas street east,,43.658883,-79.368369,"[{'label': 'display', 'lat': 43.65888321199789, 'lng': -79.36836906142996}]",221,,CA,Toronto,ON,Canada,"[383 dundas street east, Toronto ON, Canada]",,4ae666dbf964a520dca621e3
5,Daniels Spectrum,Performing Arts Venue,585 Dundas St. East,btwn Regent & Sumach,43.660137,-79.361808,"[{'label': 'display', 'lat': 43.66013679276432, 'lng': -79.36180783336452}]",364,M5A 2B7,CA,Toronto,ON,Canada,"[585 Dundas St. East (btwn Regent & Sumach), Toronto ON M5A 2B7, Canada]",,505af105e4b0df441225430b
6,Paintbox Bistro,Restaurant,555 Dundas St E,Sackville St,43.66005,-79.362855,"[{'label': 'display', 'lat': 43.660050475903624, 'lng': -79.36285470971839}]",289,M5A 2B7,CA,Toronto,ON,Canada,"[555 Dundas St E (Sackville St), Toronto ON M5A 2B7, Canada]",,505bacb2e4b0474d876eb78c
7,The Yoga Lounge,Yoga Studio,106 Sherbourne St.,at Adelaide St. East,43.655515,-79.364955,"[{'label': 'display', 'lat': 43.65551522261721, 'lng': -79.36495542526245}]",331,,CA,Toronto,ON,Canada,"[106 Sherbourne St. (at Adelaide St. East), Toronto ON, Canada]",,4b58dd55f964a5208f6f28e3
8,Berkeley Church,Event Space,315 Queen St E,at Berkeley St,43.655123,-79.365873,"[{'label': 'display', 'lat': 43.65512324174501, 'lng': -79.36587330410705}]",369,M5A 1S7,CA,Toronto,ON,Canada,"[315 Queen St E (at Berkeley St), Toronto ON M5A 1S7, Canada]",,4ade8ea8f964a5205a7621e3
9,Sumach Espresso,Coffee Shop,118 Sumach St,at Shuter St,43.658135,-79.359515,"[{'label': 'display', 'lat': 43.65813540553308, 'lng': -79.35951549011845}]",497,M5A 3J9,CA,Toronto,ON,Canada,"[118 Sumach St (at Shuter St), Toronto ON M5A 3J9, Canada]",Downtown Toronto,5619551a498e9e35fce2256b


In [204]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Ecco


# add Ecco as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Ecco',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)


# add popular spots to the map as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map

### 5. Explore trending venues near Sukothai @ Parliament St.

In [205]:
# define URL
url = 'https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)

# send GET request and get trending venues
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f06bc1c1582505d8f4f0a0e'},
 'response': {'venues': []}}

In [206]:
if len(results['response']['venues']) == 0:
    trending_venues_df = 'No trending venues are available at the moment!'
    
else:
    trending_venues = results['response']['venues']
    trending_venues_df = json_normalize(trending_venues)

    # filter columns
    columns_filtered = ['name', 'categories'] + ['location.distance', 'location.city', 'location.postalCode', 'location.state', 'location.country', 'location.lat', 'location.lng']
    trending_venues_df = trending_venues_df.loc[:, columns_filtered]

    # filter the category for each row
    trending_venues_df['categories'] = trending_venues_df.apply(get_category_type, axis=1)

In [207]:
# display trending venues
trending_venues_df

'No trending venues are available at the moment!'

In [208]:
if len(results['response']['venues']) == 0:
    venues_map = 'Cannot generate visual as no trending venues are available at the moment!'

else:
    venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Ecco


    # add Ecco as a red circle mark
    folium.features.CircleMarker(
        [latitude, longitude],
        radius=10,
        popup='Ecco',
        fill=True,
        color='red',
        fill_color='red',
        fill_opacity=0.6
    ).add_to(venues_map)


    # add the trending venues as blue circle markers
    for lat, lng, label in zip(trending_venues_df['location.lat'], trending_venues_df['location.lng'], trending_venues_df['name']):
        folium.features.CircleMarker(
            [lat, lng],
            radius=5,
            poup=label,
            fill=True,
            color='blue',
            fill_color='blue',
            fill_opacity=0.6
        ).add_to(venues_map)

In [209]:
# display map
venues_map

'Cannot generate visual as no trending venues are available at the moment!'