# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import requests # library to handle requests
from bs4 import BeautifulSoup

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20181017065710-0001


In [2]:
# Create the dataframe consist of three columns: PostalCode, Borough, and Neighborhood

URL ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' 
Rq = requests.get(URL)
BS = BeautifulSoup(Rq.content,'lxml')
table = BS.find_all('table')[0] 
df = pd.read_html(str(table))[0]
df_pcode = pd.DataFrame(df)

df_pcode.head()

Unnamed: 0,0,1,2
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [3]:
#  Drop the first coulmn and rename the coulum names

df_pcode.drop(0,inplace=True)
df_pcode.columns = ['Postcode','Borough','Neighborhood']
df_pcode.head()

Unnamed: 0,Postcode,Borough,Neighborhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [4]:
# Remove boroughs with 'not assigned' 

df_pcode1 = df_pcode[df_pcode.Borough.str.contains("Not assigned") == False]

# Reset index

df_pcode2 = df_pcode1.reset_index()
df_pcode3 = df_pcode2.drop(df_pcode2.columns[0],axis=1)
df_pcode3.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [5]:
# Combine rows with the same Postcode & Borough

df_pcode4 = df_pcode3.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_pcode4.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
# If a cell has a borough but a Not assigned neighborhood, then assign the neighborhood cell the same as the borough.

row=0
for row in range(103):
    if df_pcode4.Neighborhood[row]=='Not assigned':
        df_pcode4.Neighborhood[row] = df_pcode4.Borough[row]
        row=row+1
    
df_pcode4.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [7]:
# size of the dataframe
df_pcode4.shape

(103, 3)

## Add the latitude and the longitude coordinates of each neighborhood


In [8]:
#https://geocoder.readthedocs.io/index.html
!pip install geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K    100% |################################| 102kB 3.0MB/s ta 0:00:01
[?25hCollecting six (from geocoder)
  Downloading https://files.pythonhosted.org/packages/67/4b/141a581104b1f6397bfa78ac9d43d8ad29a7ca43ea90a2d863fe3056e86a/six-1.11.0-py2.py3-none-any.whl
Collecting future (from geocoder)
  Downloading https://files.pythonhosted.org/packages/00/2b/8d082ddfed935f3608cc61140df6dcbf0edea1bc3ab52fb6c29ae3e81e85/future-0.16.0.tar.gz (824kB)
[K    100% |################################| 829kB 936kB/s eta 0:00:01
[?25hCollecting click (from geocoder)
  Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl (81kB)
[K    100% |################################| 81kB 1.9MB/s eta 0:00:01
[?25hCollecting requests 

In [9]:
import geocoder
import time

In [10]:
# Add columns for Latitude and Longitude

df_pcode4['Latitude'] = 0.0
df_pcode4['Longitude'] = 0.0
df_pcode4.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",0.0,0.0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",0.0,0.0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.0,0.0
3,M1G,Scarborough,Woburn,0.0,0.0
4,M1H,Scarborough,Cedarbrae,0.0,0.0


#### I have used the following code but the used geocoder package doesn't respond. Thus, the provided csv file has been used to populate the geographical coordinates in the dataframe   


start_time=time.time()
row=0
for row in range(103):
    print(row)
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(df_pcode4.Postcode[row]))
        lat_lng_coords = g.latlng
        
    print("--- %s seconds ---" % round((time.time() - start_time), 2))
    df_pcode4.Latitude[row] = lat_lng_coords[0]
    df_pcode4.Longitude[row] = lat_lng_coords[1]
    row=row+1
    
df_pcode4

In [11]:
# download the csv file of longitude and latitude

!wget -O latlog.csv http://cocl.us/Geospatial_data

--2018-10-17 06:57:24--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.48.113.201
Connecting to cocl.us (cocl.us)|169.48.113.201|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2018-10-17 06:57:24--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|169.48.113.201|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2018-10-17 06:57:24--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.26.197
Connecting to ibm.box.com (ibm.box.com)|107.152.26.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2018-10-17 06:57:25--  https://ibm.ent.box.com/shared/

In [12]:
df_latlog = pd.read_csv("latlog.csv", delimiter=",")
df_pcode4['Latitude'] = df_latlog[['Latitude']].values
df_pcode4['Longitude'] = df_latlog[['Longitude']].values
df_pcode4

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Exploration and cluster of the neighborhoods in Toronto

### Import necessary Libraries


In [13]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import random # library for random number generation

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


### Define Foursquare Credentials and Version

In [45]:
# The code was removed by Watson Studio for sharing.

#### I would like to studey at University of student. I am a vegan and would like to explore vegan restaurants nearby Universiity of Toronto. Address of the  downtown Toronto (St. George) campus is 27 Kings College Cir, Toronto, ON.

In [15]:
address = '27 Kings College Cir, Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)



43.6607225 -79.3959198095151


# 1. Search for a specific venue category 

#### The venue catagory is vegan restaurants nearby (within 1000m)Univiversity of Toronto. 

In [16]:
search_query = 'Vegan'
radius = 1000
print(search_query + ' .... OK!')

Vegan .... OK!


#### Define the corresponding URL

In [17]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=VY1SPSHUQUTWEU5VHK41W3PNIS34Z5HRMVIV5IHLKYUJCX2H&client_secret=K2YCKM1HC5LVURD45BQZOC1J3I1KDCUFSY1AOMWSOEPFGRR3&ll=43.6607225,-79.3959198095151&v=20180604&query=Vegan&radius=1000&limit=30'

In [18]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5bc71619f594df1d7c0c8fdf'},
 'response': {'venues': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/vegetarian_',
       'suffix': '.png'},
      'id': '4bf58dd8d48988d1d3941735',
      'name': 'Vegetarian / Vegan Restaurant',
      'pluralName': 'Vegetarian / Vegan Restaurants',
      'primary': True,
      'shortName': 'Vegetarian / Vegan'}],
    'hasPerk': False,
    'id': '59e16436ccad6b067efe4383',
    'location': {'address': '382 College St',
     'cc': 'CA',
     'city': 'Toronto',
     'country': 'Canada',
     'distance': 861,
     'formattedAddress': ['382 College St', 'Toronto ON M5T 1S7', 'Canada'],
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.657007,
       'lng': -79.405302}],
     'lat': 43.657007,
     'lng': -79.405302,
     'postalCode': 'M5T 1S7',
     'state': 'ON'},
    'name': 'The Hogtown Vegan',
    'referralId': 'v-1539773977'}]}}

##### I am sad to learn that there is only one vegan restaurant within 1km radius from Universit of Toronto

#### Get relevant part of JSON and transform it into a pandas dataframe

In [19]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'shortName': 'Vegetarian / Vegan', 'name': '...",False,59e16436ccad6b067efe4383,382 College St,CA,Toronto,Canada,861,"[382 College St, Toronto ON M5T 1S7, Canada]","[{'label': 'display', 'lng': -79.405302, 'lat'...",43.657007,-79.405302,M5T 1S7,ON,The Hogtown Vegan,v-1539773977


#### Define information of interest and filter dataframe


In [20]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,The Hogtown Vegan,Vegetarian / Vegan Restaurant,382 College St,CA,Toronto,Canada,861,"[382 College St, Toronto ON M5T 1S7, Canada]","[{'label': 'display', 'lng': -79.405302, 'lat'...",43.657007,-79.405302,M5T 1S7,ON,59e16436ccad6b067efe4383


#### Let's visualize the Vegan restaurant that are nearby to University of Toronto, downtown Toronto (St. George) campus.

In [21]:
dataframe_filtered.name

0    The Hogtown Vegan
Name: name, dtype: object

In [22]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='University of Toronto',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Vegan restaurant as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

# 2. Explore a Given Venue

#### A. Let's explore the closest vegan restaurant (Hogtown Vegan Restaurant)

In [23]:
venue_id = '59e16436ccad6b067efe4383' # ID of The Hogtown's Vegan Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/59e16436ccad6b067efe4383?client_id=VY1SPSHUQUTWEU5VHK41W3PNIS34Z5HRMVIV5IHLKYUJCX2H&client_secret=K2YCKM1HC5LVURD45BQZOC1J3I1KDCUFSY1AOMWSOEPFGRR3&v=20180604'

#### Send GET request for result

In [24]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['createdAt', 'attributes', 'timeZone', 'beenHere', 'reasons', 'likes', 'location', 'verified', 'stats', 'dislike', 'id', 'bestPhoto', 'hereNow', 'rating', 'ok', 'canonicalUrl', 'shortUrl', 'allowMenuUrlEdit', 'inbox', 'listed', 'colors', 'ratingColor', 'categories', 'specials', 'price', 'name', 'photos', 'contact', 'ratingSignals', 'pageUpdates', 'tips'])


{'allowMenuUrlEdit': True,
 'attributes': {'groups': [{'count': 1,
    'items': [{'displayName': 'Price', 'displayValue': '$$', 'priceTier': 2}],
    'name': 'Price',
    'summary': '$$',
    'type': 'price'},
   {'count': 8,
    'items': [{'displayName': 'Brunch', 'displayValue': 'Brunch'},
     {'displayName': 'Lunch', 'displayValue': 'Lunch'},
     {'displayName': 'Dinner', 'displayValue': 'Dinner'}],
    'name': 'Menus',
    'summary': 'Dinner, Lunch & more',
    'type': 'serves'}]},
 'beenHere': {'count': 0,
  'lastCheckinExpiredAt': 0,
  'marked': False,
  'unconfirmedCount': 0},
 'bestPhoto': {'createdAt': 1533168346,
  'height': 1440,
  'id': '5b624ada872f7d002cd2bc76',
  'prefix': 'https://igx.4sqi.net/img/general/',
  'source': {'name': 'Foursquare for iOS',
   'url': 'https://foursquare.com/download/#/iphone'},
  'suffix': '/32934307_QcNKiezyCpFQDL243WYXtW3COuwa_5bWgsgUU2eofL0.jpg',
  'visibility': 'public',
  'width': 1440},
 'canonicalUrl': 'https://foursquare.com/v/the-ho

#### B. Get the venue's overall rating

In [25]:
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

7.7


#### C. Get the number of tips

In [26]:
result['response']['venue']['tips']['count']

5

#### D. Get the venue's tips

In [27]:
## Ecco Tips
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5bc7161a4c1f67199abb94f4'},
 'response': {'tips': {'count': 5,
   'items': [{'agreeCount': 0,
     'authorInteractionType': 'liked',
     'canonicalUrl': 'https://foursquare.com/item/5b3ac1e06f706a002c6eb9dc',
     'createdAt': 1530577376,
     'disagreeCount': 0,
     'id': '5b3ac1e06f706a002c6eb9dc',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'photo': {'createdAt': 1530577378,
      'height': 1920,
      'id': '5b3ac1e24420d8003a4b9cca',
      'prefix': 'https://igx.4sqi.net/img/general/',
      'source': {'name': 'Foursquare for iOS',
       'url': 'https://foursquare.com/download/#/iphone'},
      'suffix': '/37845893_bSKNeSSeNBmIUDF5gFkmrrG6S3MdllkivNHpKG9rQ2k.jpg',
      'visibility': 'public',
      'width': 1440},
     'photourl': 'https://igx.4sqi.net/img/general/original/37845893_bSKNeSSeNBmIUDF5gFkmrrG6S3MdllkivNHpKG9rQ2k.jpg',
     'text': 'Friendly, relaxed atmosphere in a great location. Foo

#### Get tips and list of associated features

In [28]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['createdAt', 'lang', 'canonicalUrl', 'agreeCount', 'user', 'type', 'likes', 'todo', 'disagreeCount', 'photo', 'text', 'authorInteractionType', 'id', 'photourl', 'logView'])

#### Format column width and display all tips

In [29]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Friendly, relaxed atmosphere in a great location. Food and drink super and served really quick. I had the corn chips with guacamole and the mac n cheese and it was very very good. Loved the place!",0,0,5b3ac1e06f706a002c6eb9dc,Denise,O'Connor,female,37845893


# 3. Search a Foursquare User

#### Define URL, send GET request and display features associated with user

In [30]:
user_id = '37845893' # user ID with most agree counts and complete profile

url = 'https://api.foursquare.com/v2/users/{}?client_id={}&client_secret={}&v={}'.format(user_id, CLIENT_ID, CLIENT_SECRET, VERSION) # define URL

# send GET request
results = requests.get(url).json()
user_data = results['response']['user']

# display features associated with user
user_data.keys()

dict_keys(['mayorships', 'gender', 'canonicalUrl', 'firstName', 'bio', 'type', 'checkins', 'lenses', 'friends', 'photo', 'lists', 'photos', 'contact', 'id', 'homeCity', 'lastName', 'tips'])

In [31]:
print('First Name: ' + user_data['firstName'])
print('Last Name: ' + user_data['lastName'])
print('Home City: ' + user_data['homeCity'])

First Name: Denise
Last Name: O'Connor
Home City: Edinburgh


#### How many tips has this user submitted?

In [32]:
user_data['tips']

{'count': 6}

###### Hmmm... Denise is not an active Foursquare user, only with 6 tips.


#### Get User's tips

In [33]:
# define tips URL
url = 'https://api.foursquare.com/v2/users/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(user_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

# send GET request and get user's tips
results = requests.get(url).json()
tips = results['response']['tips']['items']

# format column width
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips)

# filter columns
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display user's tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id
0,"Loved this wee place. The Classic egg and cheese muffin is the best I’ve ever tasted. Fresh orange juice, coffee and great service.",0,0,5b494d6fd4cc98002c7a22ba
1,"Friendly, relaxed atmosphere in a great location. Food and drink super and served really quick. I had the corn chips with guacamole and the mac n cheese and it was very very good. Loved the place!",0,0,5b3ac1e06f706a002c6eb9dc
2,Lunch menu great selection and only £7.95 for two courses until 4pm.,0,0,5946981025fb7b385403ce27
3,Laid back atmosphere great menu lovely staff.,0,0,5808cba6d67ca60cf58b0ba9
4,"Everything. Wine, beer, relax and watch the beer being brewed.",0,0,54023227498e2d22bd4a6342
5,Have dinner in the Rotunda and park there. Get the code when you leave then enjoy the show at the Hydro. Leave sharp and you'll get out of the City in minutes. Dx,0,0,5402318a498e67869a96ff39


#### Let's get the venue for the tip with the greatest number of agree counts


In [34]:
tip_id = '4f8ef19ce4b007514d63404e' # tip id

# define URL
url = 'http://api.foursquare.com/v2/tips/{}?client_id={}&client_secret={}&v={}'.format(tip_id, CLIENT_ID, CLIENT_SECRET, VERSION)

# send GET Request and examine results
result = requests.get(url).json()
print(result['response']['tip']['venue']['name'])
print(result['response']['tip']['venue']['location'])

Old Navy
{'city': 'New York', 'state': 'NY', 'crossStreet': 'btwn 6th & 7th Ave', 'lng': -73.98991584777832, 'country': 'United States', 'formattedAddress': ['150 W 34th St (btwn 6th & 7th Ave)', 'New York, NY 10001', 'United States'], 'address': '150 W 34th St', 'postalCode': '10001', 'cc': 'US', 'lat': 40.750248045828585, 'labeledLatLngs': [{'label': 'display', 'lng': -73.98991584777832, 'lat': 40.750248045828585}]}


#### Get User's friends

In [35]:
user_friends = json_normalize(user_data['friends']['groups'][0]['items'])
user_friends

Unnamed: 0,bio,firstName,gender,homeCity,id,lastName,lists.groups,photo.prefix,photo.suffix,tips.count
0,,Jamie,male,"Ayr, SCT",89785516,McCallum,"[{'type': 'created', 'items': [], 'count': 2}]",https://igx.4sqi.net/img/user/,/89785516-BLLRGV2PIF3AQLGM.jpg,0
1,,David,male,"Los Angeles, CA",2700751,Mitchell,"[{'type': 'created', 'items': [], 'count': 2}]",https://igx.4sqi.net/img/user/,/JW3XIWPLSX3FUH1W.jpg,0
2,,Mark,male,"Barrhead, UK",18952005,Houston,"[{'type': 'created', 'items': [], 'count': 2}]",https://igx.4sqi.net/img/user/,/UEJI4FM30Z5TYSZR.jpg,0


######  Denise has few number of friends in Foursquare.

### Retrieve the User's Profile Image

In [36]:
user_data

{'bio': '',
 'canonicalUrl': 'https://foursquare.com/user/37845893',
 'checkins': {'count': 58, 'items': []},
 'contact': {},
 'firstName': 'Denise',
 'friends': {'count': 3,
  'groups': [{'count': 3,
    'items': [{'bio': '',
      'contact': {},
      'firstName': 'Jamie',
      'gender': 'male',
      'homeCity': 'Ayr, SCT',
      'id': '89785516',
      'lastName': 'McCallum',
      'lists': {'groups': [{'count': 2, 'items': [], 'type': 'created'}]},
      'photo': {'prefix': 'https://igx.4sqi.net/img/user/',
       'suffix': '/89785516-BLLRGV2PIF3AQLGM.jpg'},
      'tips': {'count': 0}},
     {'bio': '',
      'contact': {},
      'firstName': 'David',
      'gender': 'male',
      'homeCity': 'Los Angeles, CA',
      'id': '2700751',
      'lastName': 'Mitchell',
      'lists': {'groups': [{'count': 2, 'items': [], 'type': 'created'}]},
      'photo': {'prefix': 'https://igx.4sqi.net/img/user/',
       'suffix': '/JW3XIWPLSX3FUH1W.jpg'},
      'tips': {'count': 0}},
     {'bio': 

In [37]:
# 1. grab prefix of photo
# 2. grab suffix of photo
# 3. concatenate them using the image size  
Image(url='https://igx.4sqi.net/img/user/300x300/89785516-BLLRGV2PIF3AQLGM.jpg')

##### Nice user profile picture with his cute toddlers!

# 4. Explore a location

#### Let's explore popular spots around University of Toronto. 

In [38]:
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=VY1SPSHUQUTWEU5VHK41W3PNIS34Z5HRMVIV5IHLKYUJCX2H&client_secret=K2YCKM1HC5LVURD45BQZOC1J3I1KDCUFSY1AOMWSOEPFGRR3&ll=43.6607225,-79.3959198095151&v=20180604&radius=1000&limit=30'

In [39]:
results = requests.get(url).json()
'There are {} around Universty of Toronto.'.format(len(results['response']['groups'][0]['items']))

'There are 30 around Universty of Toronto.'

#### Get relevant part of JSON

In [40]:
items = results['response']['groups'][0]['items']
items[0]

{'reasons': {'count': 0,
  'items': [{'reasonName': 'globalInteractionReason',
    'summary': 'This spot is popular',
    'type': 'general'}]},
 'referralId': 'e-0-4b9d206bf964a520e69136e3-0',
 'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
     'suffix': '.png'},
    'id': '4bf58dd8d48988d163941735',
    'name': 'Park',
    'pluralName': 'Parks',
    'primary': True,
    'shortName': 'Park'}],
  'id': '4b9d206bf964a520e69136e3',
  'location': {'address': 'University Ave.',
   'cc': 'CA',
   'city': 'Toronto',
   'country': 'Canada',
   'crossStreet': 'at Wellesley Ave.',
   'distance': 468,
   'formattedAddress': ['University Ave. (at Wellesley Ave.)',
    'Toronto ON M5R 2E8',
    'Canada'],
   'labeledLatLngs': [{'label': 'display',
     'lat': 43.66394609897775,
     'lng': -79.39217952520835}],
   'lat': 43.66394609897775,
   'lng': -79.39217952520835,
   'postalCode': 'M5R 2E8',
   'state': 'ON'},
  'name': "Queen's Par

In [41]:
dataframe = json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(10)

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,Queen's Park,Park,University Ave.,CA,Toronto,Canada,at Wellesley Ave.,468,"[University Ave. (at Wellesley Ave.), Toronto ON M5R 2E8, Canada]","[{'label': 'display', 'lng': -79.39217952520835, 'lat': 43.66394609897775}]",43.663946,-79.39218,,M5R 2E8,ON,4b9d206bf964a520e69136e3
1,Hart House Theatre,Theater,7 Hart House Cir.,CA,Toronto,Canada,University of Toronto,334,"[7 Hart House Cir. (University of Toronto), Toronto ON M5S 3H3, Canada]","[{'label': 'display', 'lng': -79.3946163990837, 'lat': 43.66357134217992}]",43.663571,-79.394616,,M5S 3H3,ON,4ce47a4bc850721e2b8bc38a
2,Kekou Gelato House,Ice Cream Shop,13 Baldwin St,CA,Toronto,Canada,btwn Beverley & McCaul,575,"[13 Baldwin St (btwn Beverley & McCaul), Toronto ON M5T 1L1, Canada]","[{'label': 'display', 'lng': -79.39271479734887, 'lat': 43.65609938127313}]",43.656099,-79.392715,,M5T 1L1,ON,51e87a62498e8eea21f772d3
3,Prenup Pub,Gastropub,191 College St,CA,Toronto,Canada,Henry St,222,"[191 College St (Henry St), Toronto ON M5S 3E4, Canada]","[{'label': 'display', 'lng': -79.39482802742828, 'lat': 43.658882366438164}]",43.658882,-79.394828,,M5S 3E4,ON,54238a83498e5efd4a50435b
4,A & C World,Gaming Cafe,452 Spadina Ave.,CA,Toronto,Canada,at College St.,485,"[452 Spadina Ave. (at College St.), Toronto ON M5S 2J3, Canada]","[{'label': 'display', 'lng': -79.39984655786655, 'lat': 43.65740880302916}]",43.657409,-79.399847,,M5S 2J3,ON,4dbc33b90cb691071c9c7d29
5,Vegetarian Haven,Vegetarian / Vegan Restaurant,17 Baldwin St,CA,Toronto,Canada,,582,"[17 Baldwin St, Toronto ON M5T 1L1, Canada]","[{'label': 'display', 'lng': -79.3927577742003, 'lat': 43.656016028357094}]",43.656016,-79.392758,,M5T 1L1,ON,4aeb711ef964a52017c221e3
6,Mother's Dumplings,Dumpling Restaurant,421 Spadina Ave.,CA,Toronto,Canada,btwn College & Cecil St.,497,"[421 Spadina Ave. (btwn College & Cecil St.), Toronto ON M5T 2G6, Canada]","[{'label': 'display', 'lng': -79.39955196237875, 'lat': 43.65710501801941}]",43.657105,-79.399552,,M5T 2G6,ON,4b7716a9f964a520297c2ee3
7,Carmen's City Market,Supermarket,301 College Street,CA,Toronto,Canada,,573,"[301 College Street, Toronto ON M5T 1S2, Canada]","[{'label': 'display', 'lng': -79.40162631000557, 'lat': 43.65764732156707}]",43.657647,-79.401626,,M5T 1S2,ON,57b8cab3498efd6376fd296b
8,Booster Juice,Smoothie Shop,"257 College Street, Main Floor",CA,Toronto,Canada,btwn Huron & Spadina,406,"[257 College Street, Main Floor (btwn Huron & Spadina), Toronto ON M5T 1R5, Canada]","[{'label': 'display', 'lng': -79.3993, 'lat': 43.658016}]",43.658016,-79.3993,,M5T 1R5,ON,4baa6c4af964a520bb683ae3
9,Rasa,Restaurant,196 Robert Street,CA,,Canada,,688,"[196 Robert Street, Canada]","[{'label': 'display', 'lng': -79.40398803188654, 'lat': 43.662756751275445}]",43.662757,-79.403988,,,,527d450111d25050de4ea0d8


#### Let's visualize these items on the map around our location

In [42]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Ecco


# add Ecco as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Ecco',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)


# add popular spots to the map as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map

# 5. Explore Trending Venues

#### Let's explor the trending venues around University of Toronto, King's campus, Downtown Toronto.

In [43]:
# define URL
url = 'https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)

# send GET request and get trending venues
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5bc7161cdb04f50f805d46d9'},
 'response': {'venues': []}}

#### Check if any venues are trending at this time.

In [44]:
if len(results['response']['venues']) == 0:
    trending_venues_df = 'No trending venues are available at the moment!'
    
else:
    trending_venues = results['response']['venues']
    trending_venues_df = json_normalize(trending_venues)

    # filter columns
    columns_filtered = ['name', 'categories'] + ['location.distance', 'location.city', 'location.postalCode', 'location.state', 'location.country', 'location.lat', 'location.lng']
    trending_venues_df = trending_venues_df.loc[:, columns_filtered]

    # filter the category for each row
    trending_venues_df['categories'] = trending_venues_df.apply(get_category_type, axis=1)
    
    # display trending venues
trending_venues_df

'No trending venues are available at the moment!'

#### I couldn't able to get trending venues. The probable reason is there is no highest foot trafic when I have tried to fetche. The time was around 2:00pm in my local time but  6:00AM in Toronto's time.