### The table was copied from Wikipedia into a Google Spreadsheet file,
### which was then saved as a CSV file to the local computer and read by Pandas from there. 

In [1]:
# Import libraries and dataset
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns
sns.set()

# Reading the content of the CSV file into a Pandas Dataframe
df = pd.read_csv('/Users/yslim/Desktop/NY.csv')
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Preprocessing Data

In [2]:
# Dropping all entries with 'Borough' == Not assigned
df = df[df.Borough != 'Not assigned']

# Sort all values according to Postcode
df.sort_values("Postcode", inplace=True) 

# Combining entires with same Postcode but different Neighbourhood into a single entry and a new DF
df2 = df.groupby('Postcode')['Neighbourhood'].apply(','.join).reset_index()

# Drop duplicates in the 1st DF and convert all values to Strings
df = df.drop_duplicates(subset='Postcode', keep="first")
df = df.applymap(str) 

# Make both DF index the same
df2.index = df.index
# Move the 'Borough' column from DF1 to DF2
df2[['Borough']] = df[['Borough']]
df2.head(11)

Unnamed: 0,Postcode,Neighbourhood,Borough
11,M1B,"Rouge,Malvern",Scarborough
29,M1C,"Port Union,Rouge Hill,Highland Creek",Scarborough
42,M1E,"Guildwood,Morningside,West Hill",Scarborough
53,M1G,Woburn,Scarborough
62,M1H,Cedarbrae,Scarborough
76,M1J,Scarborough Village,Scarborough
91,M1K,"East Birchmount Park,Ionview,Kennedy Park",Scarborough
108,M1L,"Golden Mile,Oakridge,Clairlea",Scarborough
123,M1M,"Cliffcrest,Scarborough Village West,Cliffside",Scarborough
141,M1N,"Cliffside West,Birch Cliff",Scarborough


In [3]:
df2.shape

(103, 3)

In [4]:
import sys
!{sys.executable} -m pip install geocoder



## Getting the Lats and Longs

In [5]:
import geocoder # import geocoder

pos = df2['Postcode'].tolist()

lats = []
longs = []

for p in pos:
    
    lat_lng_coords = None
    
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(p))
        lat_lng_coords = g.latlng
        
    lats.append(lat_lng_coords[0])
    longs.append(lat_lng_coords[1])
    
df2['Latitude'] = lats
df2['Longtitude'] = longs

df2.head()

Unnamed: 0,Postcode,Neighbourhood,Borough,Latitude,Longtitude
11,M1B,"Rouge,Malvern",Scarborough,43.811525,-79.195517
29,M1C,"Port Union,Rouge Hill,Highland Creek",Scarborough,43.78573,-79.15875
42,M1E,"Guildwood,Morningside,West Hill",Scarborough,43.76569,-79.175256
53,M1G,Woburn,Scarborough,43.768359,-79.21759
62,M1H,Cedarbrae,Scarborough,43.769688,-79.23944


### Creating a map of the locations using Folium

In [12]:
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [101]:
import folium # map rendering library

# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longtitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### Analyzing a particular borough:  Scarborough

In [18]:
scarborough_data = df2[df2['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head()

Unnamed: 0,Postcode,Neighbourhood,Borough,Latitude,Longtitude
0,M1B,"Rouge,Malvern",Scarborough,43.811525,-79.195517
1,M1C,"Port Union,Rouge Hill,Highland Creek",Scarborough,43.78573,-79.15875
2,M1E,"Guildwood,Morningside,West Hill",Scarborough,43.76569,-79.175256
3,M1G,Woburn,Scarborough,43.768359,-79.21759
4,M1H,Cedarbrae,Scarborough,43.769688,-79.23944


#### Getting the coordinates of Scarborough

In [26]:
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [30]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[location.latitude, location.longitude], zoom_start=13)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longtitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

## Exploring the region with Foursquare API

#### Defining Foursquare login credentials

In [62]:
CLIENT_ID = 'KW2GPGLKGNL3Y2ZPWKUVUJRSPCKDHM1FV4ULZFPZ3GAKHXKF' # your Foursquare ID
CLIENT_SECRET = 'FZ1M0BOJ1PJO4YZVVRLQMRKKJULAN3POJIFR3MODL2SLKHDB' # your Foursquare Secret
VERSION = '20190924'
LIMIT = 30

#### Exploring the first neighbourhood in Scarborough

In [63]:
neighborhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'Longtitude'] # neighborhood longitude value

neighborhood_name = scarborough_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.811525, -79.195517214.


#### Getting the top 100 venues that are in Rouge,Malvern within a radius of 1000 meters.¶

In [64]:
# Constructing the GET request 
LIMIT = 100
radius = 1000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

# Getting the results back from the GET request
import json
results = requests.get(url).json()


# Just a function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
    
# Cleaning the results and presenting it in a Pandas dataframe
venues = results['response']['groups'][0]['items']    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Canadiana exhibit,Zoo Exhibit,43.817962,-79.193374
1,Wendy's,Fast Food Restaurant,43.807448,-79.199056
2,Ontrack Services,Financial or Legal Service,43.816399,-79.199157
3,Grizzly Bear Exhibit,Zoo Exhibit,43.817031,-79.193458
4,Upper Rouge Trail,Trail,43.809988,-79.186147


In [44]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

6 venues were returned by Foursquare.


## 1. Search for a specific venue category
> `https://api.foursquare.com/v2/venues/`**search**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&ll=`**LATITUDE**`,`**LONGITUDE**`&v=`**VERSION**`&query=`**QUERY**`&radius=`**RADIUS**`&limit=`**LIMIT**

### Let's search for Food around Scaborough

In [65]:
search_query = 'Food'
radius = 2000

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()

# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Rex Pak Food Packaging Ltd,Business Service,85 Thornmount Dr,CA,Scarborough,Canada,,681,"[85 Thornmount Dr, Scarborough ON M1B 5V3, Can...","[{u'lat': 43.805459, u'lng': -79.194344, u'lab...",43.805459,-79.194344,M1B 5V3,ON,57524de4498e4f2143e9c292
1,Rubini West Indian Food Market,Grocery Store,31 Tapscott Rd. Unit B2,CA,Toronto,Canada,,2112,"[31 Tapscott Rd. Unit B2, Toronto ON, Canada]","[{u'lat': 43.8074645075, u'lng': -79.221210479...",43.807465,-79.22121,,ON,4e35b8a8ae60d86c3ac2e8d4
2,Meena's Fine Foods,Indian Restaurant,"1295 Morningside Avenue, Unit 22",CA,Scarborough,Canada,Sheppard Avenue East,855,"[1295 Morningside Avenue, Unit 22 (Sheppard Av...","[{u'lat': 43.8044757584, u'lng': -79.199752807...",43.804476,-79.199753,M1B 4Z4,ON,517dcdb6f1363b7a770a8424
3,Charley's West Indian Foods Ltd,Caribbean Restaurant,1154 Morningside Ave,CA,Scarborough,Canada,,1255,"[1154 Morningside Ave, Scarborough ON M1B 3A4,...","[{u'lat': 43.8005806, u'lng': -79.1993089, u'l...",43.800581,-79.199309,M1B 3A4,ON,4d2c8332342d6dcbe82020cb


## 2. Get a list of venues around the location
Create a new dataframe called scarborough_venues.¶

In [66]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# type your answer here

scarborough_venues = getNearbyVenues(names=scarborough_data['Neighbourhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longtitude']
                                  )


# print the dataframe
scarborough_venues.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.811525,-79.195517,R & K Woodworking Specialists Inc,43.808233,-79.196857,Construction & Landscaping
1,"Rouge,Malvern",43.811525,-79.195517,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
2,"Port Union,Rouge Hill,Highland Creek",43.78573,-79.15875,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
3,"Port Union,Rouge Hill,Highland Creek",43.78573,-79.15875,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood,Morningside,West Hill",43.76569,-79.175256,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
5,"Guildwood,Morningside,West Hill",43.76569,-79.175256,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
6,"Guildwood,Morningside,West Hill",43.76569,-79.175256,Heron Park,43.769327,-79.177201,Park
7,Woburn,43.768359,-79.21759,Starbucks,43.770037,-79.221156,Coffee Shop
8,Woburn,43.768359,-79.21759,Al-Hamd Biryani & Pizza,43.767585,-79.21957,Indian Restaurant
9,Woburn,43.768359,-79.21759,cheapOseo,43.766042,-79.218539,Business Service


#### If you look under the 'Venue' category, you can see there are different shops categories
Let's check how many venues (different shops) were returned for each neighborhood

In [68]:
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,14,14,14,14,14,14
Cedarbrae,3,3,3,3,3,3
"Cliffcrest,Scarborough Village West,Cliffside",10,10,10,10,10,10
"Cliffside West,Birch Cliff",6,6,6,6,6,6
"East Birchmount Park,Ionview,Kennedy Park",5,5,5,5,5,5
"Golden Mile,Oakridge,Clairlea",11,11,11,11,11,11
"Guildwood,Morningside,West Hill",3,3,3,3,3,3
L'Amoreaux West,13,13,13,13,13,13
"Maryvale,Wexford",2,2,2,2,2,2
"Milliken,Agincourt North,L'Amoreaux East,Steeles East",2,2,2,2,2,2


### 2A. Analyzing each Neighborhood
Creating a table of Neighborhoods and the shops available there

In [69]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,Auto Garage,Bakery,Bar,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,...,Shanghai Restaurant,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Trail,Train Station,Vietnamese Restaurant
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Port Union,Rouge Hill,Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Port Union,Rouge Hill,Highland Creek",0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category¶

In [70]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighborhood,Auto Garage,Bakery,Bar,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,...,Shanghai Restaurant,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Trail,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,...,0.071429,0.142857,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.071429
1,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
2,"Cliffcrest,Scarborough Village West,Cliffside",0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cliffside West,Birch Cliff",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"East Birchmount Park,Ionview,Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues¶

In [72]:
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                   venue  freq
0          Shopping Mall  0.14
1     Chinese Restaurant  0.14
2  Vietnamese Restaurant  0.07
3            Supermarket  0.07
4                   Pool  0.07


----Cedarbrae----
               venue  freq
0         Playground  0.33
1              Trail  0.33
2             Lounge  0.33
3        Auto Garage  0.00
4  Indian Restaurant  0.00


----Cliffcrest,Scarborough Village West,Cliffside----
                  venue  freq
0  Fast Food Restaurant   0.2
1        Hardware Store   0.1
2        Discount Store   0.1
3          Liquor Store   0.1
4          Burger Joint   0.1


----Cliffside West,Birch Cliff----
                   venue  freq
0               Gym Pool  0.17
1                    Gym  0.17
2           Skating Rink  0.17
3  General Entertainment  0.17
4        College Stadium  0.17


----East Birchmount Park,Ionview,Kennedy Park----
               venue  freq
0   Department Store   0.2
1     Discount Store   0.2
2         Hobby Shop  

#### Let's print each neighborhood along with the top 10 venues¶ IN A PANDAS DATAFRAME

In [75]:
# This function sorts the venues in a descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


# Creating the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Shopping Mall,Chinese Restaurant,Vietnamese Restaurant,Department Store,Park,Pool,Shanghai Restaurant,Bubble Tea Shop,Skating Rink,Supermarket
1,Cedarbrae,Trail,Lounge,Playground,Vietnamese Restaurant,Construction & Landscaping,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint
2,"Cliffcrest,Scarborough Village West,Cliffside",Fast Food Restaurant,Hardware Store,Sandwich Place,Liquor Store,Discount Store,Pharmacy,Coffee Shop,Pizza Place,Burger Joint,Bus Station
3,"Cliffside West,Birch Cliff",College Stadium,Skating Rink,Gym,General Entertainment,Park,Gym Pool,Burger Joint,Department Store,Bakery,Grocery Store
4,"East Birchmount Park,Ionview,Kennedy Park",Convenience Store,Hobby Shop,Discount Store,Coffee Shop,Department Store,Vietnamese Restaurant,Gym,Grocery Store,Golf Course,Gift Shop


## 3. Explore a Given Venue
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**

#### Let's get a venue's ID from our earlier Dataframe 

In [94]:
dataframe_filtered.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Rex Pak Food Packaging Ltd,Business Service,85 Thornmount Dr,CA,Scarborough,Canada,,681,"[85 Thornmount Dr, Scarborough ON M1B 5V3, Can...","[{u'lat': 43.805459, u'lng': -79.194344, u'lab...",43.805459,-79.194344,M1B 5V3,ON,57524de4498e4f2143e9c292
1,Rubini West Indian Food Market,Grocery Store,31 Tapscott Rd. Unit B2,CA,Toronto,Canada,,2112,"[31 Tapscott Rd. Unit B2, Toronto ON, Canada]","[{u'lat': 43.8074645075, u'lng': -79.221210479...",43.807465,-79.22121,,ON,4e35b8a8ae60d86c3ac2e8d4
2,Meena's Fine Foods,Indian Restaurant,"1295 Morningside Avenue, Unit 22",CA,Scarborough,Canada,Sheppard Avenue East,855,"[1295 Morningside Avenue, Unit 22 (Sheppard Av...","[{u'lat': 43.8044757584, u'lng': -79.199752807...",43.804476,-79.199753,M1B 4Z4,ON,517dcdb6f1363b7a770a8424
3,Charley's West Indian Foods Ltd,Caribbean Restaurant,1154 Morningside Ave,CA,Scarborough,Canada,,1255,"[1154 Morningside Ave, Scarborough ON M1B 3A4,...","[{u'lat': 43.8005806, u'lng': -79.1993089, u'l...",43.800581,-79.199309,M1B 3A4,ON,4d2c8332342d6dcbe82020cb


#### Let's explore Meena's Fine Foods

In [95]:
venue_id = '517dcdb6f1363b7a770a8424' # ID of Meena's fine foods
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

# Send the GET request
result = requests.get(url).json()
result['response']['venue']

# Let's print the restaurant's ratings
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

This venue has not been rated yet.


### 3a. Explore a Given Venue's Reviews (known as Tips)
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

In [98]:
## Meena's Fine Foods Tips (Reviews)
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
tips = results['response']['tips']['items']
tip = results['response']['tips']['items'][0]

# Formatting it neatly in a dataframe 
pd.set_option('display.max_colwidth', -1)
tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Idli, Coconut Chutney and Sambar",0,0,550e5402498ec819f718ca31,Vijaya,Selvaraju,female,38686966


## 4. Explore Trending Venues
> `https://api.foursquare.com/v2/venues/`**trending**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&ll=`**LATITUDE**`,`**LONGITUDE**`&v=`**VERSION**

#### Now, instead of simply exploring the area around Meena's Fine Foods, you are interested in knowing the venues that are trending at the time you are done with your lunch, meaning the places with the highest foot traffic. So let's do that and get the trending venues around here.

In [99]:
# define URL
url = 'https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)

# send GET request and get trending venues
results = requests.get(url).json()

# Check if there are any trending venues
if len(results['response']['venues']) == 0:
    trending_venues_df = 'No trending venues are available at the moment!'
    
else:
    trending_venues = results['response']['venues']
    trending_venues_df = json_normalize(trending_venues)

    # filter columns
    columns_filtered = ['name', 'categories'] + ['location.distance', 'location.city', 'location.postalCode', 'location.state', 'location.country', 'location.lat', 'location.lng']
    trending_venues_df = trending_venues_df.loc[:, columns_filtered]

    # filter the category for each row
    trending_venues_df['categories'] = trending_venues_df.apply(get_category_type, axis=1)
    

# Show results 
trending_venues_df

'No trending venues are available at the moment!'

## 7. Clustering with 2(b) Dataframe 

In [84]:
# Create the dataframe to run the clustering on
scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)
scarborough_grouped_clustering.head()

Unnamed: 0,Auto Garage,Bakery,Bar,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,Business Service,...,Shanghai Restaurant,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Trail,Train Station,Vietnamese Restaurant
0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.071429,0.142857,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.071429
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [85]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 1], dtype=int32)

#### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [87]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged2 = scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarborough_merged2 = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

scarborough_merged2.head() # check the last columns!

ValueError: cannot insert Cluster Labels, already exists

In [92]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longtitude'], scarborough_merged['Neighbourhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

KeyError: 'Cluster Labels'