<a href="https://colab.research.google.com/github/vishalkumarlondon/data-viz-coursework-group-2/blob/master/colab-notebooks/foursquare_api_coursework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Get data from Foursquare

Code credits from Michael Cascio were very helpful - https://github.com/cascio/IBM_Data_Science_Capstone

### Import Required Python Packages

In [0]:
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize

import folium
import json
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import urllib

### Set Foursquare API Credentials

In [0]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '' # Foursquare API version

### Import London, New York, Paris Neighborhood Name & Location Centroid Data

In [0]:
#pull data down
!gsutil cp gs://public-data-data_viz-2/fashion_week_cities_location_search_Q2_2020.csv fashion_week_cities_location_search_Q2_2020.csv

Copying gs://public-data-data_viz-2/fashion_week_cities_location_search_Q2_2020.csv...
- [1 files][  6.1 MiB/  6.1 MiB]                                                
Operation completed over 1 objects/6.1 MiB.                                      


In [0]:
city_neighborhoods = pd.read_csv('fashion_week_cities_location_search_Q2_2020.csv')

city_neighborhoods = city_neighborhoods.loc[:, ['Borough', 'Neighborhood', 'Latitude','Longitude']]

In [0]:
city_neighborhoods.shape

(516, 4)

In [0]:
city_neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,1er Ardt,Louvre,48.861,2.339
1,1er Ardt,Louvre,48.862,2.338
2,1er Ardt,Louvre,48.858,2.342
3,1er Ardt,Louvre,48.859,2.34
4,1er Ardt,Louvre,48.864,2.333


### Retrieve Relevant Venue Data From Foursquare

#### Foursquare Relevant Venue CategoryIDs

https://developer.foursquare.com/docs/build-with-foursquare/categories/

*   '4d4b7104d754a06370d81259', #Arts & Entertainment
*   '4d4b7105d754a06373d81259', #Event'
*   '4d4b7105d754a06376d81259', #Nightlife Spot
*   '56aa371be4b08b9a8d5734cf', #Ballroom
*   '4bf58dd8d48988d103951735', #Clothing Store
*   '4bf58dd8d48988d1ff931735', #Convention Center
*   '52e81612bcbc57f1066b7a35', #Club House
*   '52e81612bcbc57f1066b7a32', #Cultural Center
*   '4bf58dd8d48988d171941735' #Event Space


#### Function To Create API Url

In [0]:
def createURL(endpoint, CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, categoryId, limit):
    url = '{}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&categoryId={}&limit={}'.format(
            endpoint,
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            categoryId,
            limit
        )
    return url

endpoint = 'https://api.foursquare.com/v2/venues/search?'

# https://developer.foursquare.com/docs/build-with-foursquare/categories/
categoryIds = [
               '4d4b7104d754a06370d81259', #Arts & Entertainment
               '4d4b7105d754a06373d81259', #Event
               '4d4b7105d754a06376d81259', #Nightlife Spot
               '56aa371be4b08b9a8d5734cf', #Ballroom
               '4bf58dd8d48988d103951735', #Clothing Store
               '4bf58dd8d48988d1ff931735', #Convention Center
               '52e81612bcbc57f1066b7a35', #Club House
               '52e81612bcbc57f1066b7a32', #Cultural Center
               '4bf58dd8d48988d171941735' #Event Space
              ]


categoryId = ','.join(categoryIds)
limit = 50

#### Function To Retrieve All Relevant Venues Per Neighborhood

In [0]:

def getNearbyMusicVenues(neighborhoods, latitudes, longitudes, radius=1000):
    endpoint = 'https://api.foursquare.com/v2/venues/search?'
    venues_list = []   
    
    for hood_name, lat, lng in zip(neighborhoods, latitudes, longitudes):
    
        url = createURL(endpoint, CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, categoryId, limit)
        
        results = requests.get(url).json()['response']['venues']

        for item in results:
            venue_name = item['name']
            venue_category = item['categories'][0]['name']
            venue_lat = item['location']['lat']
            venue_lng = item['location']['lng']
            try:
                venue_city = item['location']['city']
            except:
                venue_city = 'N/A'
            
            venues_list.append([(hood_name,
                                lat,
                                lng,
                                venue_name,
                                venue_category,
                                venue_lat,
                                venue_lng,
                                venue_city
                               )])
            
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Neighborhood Latitude',
                             'Neighborhood Longitude',
                             'Venue Name',
                             'Venue Category',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue City'
                            ]
    return nearby_venues

prelim_venue_data = getNearbyMusicVenues(city_neighborhoods['Neighborhood'],
                                         city_neighborhoods['Latitude'],
                                         city_neighborhoods['Longitude'],
                                         radius = 500
                                        )


#### How Many Venues Did We Retrieve?

In [0]:

print(prelim_venue_data.shape)
prelim_venue_data.head(5)


(15985, 8)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude,Venue City
0,Louvre,48.861,2.339,Musée du Louvre,Art Museum,48.860847,2.33644,Paris
1,Louvre,48.861,2.339,Monument de l'amiral Gaspard de Coligny,Outdoor Sculpture,48.861216,2.339395,Paris
2,Louvre,48.861,2.339,UGC Ciné Cité Les Halles,Multiplex,48.862607,2.343531,Paris
3,Louvre,48.861,2.339,Comédie-Française,Theater,48.863088,2.336612,Paris
4,Louvre,48.861,2.339,Intersport,Sporting Goods Shop,48.860796,2.341252,Paris


#### Save DataFrame As CSV To Avoid Further API Calls

In [0]:
# save the data into google bucket!
prelim_venue_data.to_csv('fw19_foursquareapi_reponse.csv')
!gsutil cp  'fw19_foursquareapi_reponse.csv' gs://public-data-data_viz-2/fw19_foursquareapi_reponse.csv

Copying file://fw19_foursquareapi_reponse.csv [Content-Type=text/csv]...
-
Operation completed over 1 objects/1.8 MiB.                                      


#### Remove duplicates and Save DataFrame As CSV

In [0]:
#drop duplicates
prelim_venue_data.drop_duplicates(subset=['Venue Name', 'Venue City'], inplace=True)

In [0]:
# save the data into google bucket!
prelim_venue_data.to_csv('fw19_foursquareapi_reponse_clean.csv')
!gsutil cp  'fw19_foursquareapi_reponse_clean.csv' gs://public-data-data_viz-2/fw19_foursquareapi_reponse.csv

Copying file://fw19_foursquareapi_reponse_clean.csv [Content-Type=text/csv]...
-
Operation completed over 1 objects/1.5 MiB.                                      
