In [266]:
# imports
import numpy as np
import pandas as pd
import requests
import os 

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [267]:
"""RESTAURANT INFORMATION"""
# import data from previous part/defining variables
bikeDF = pd.read_csv('../data/Quebec_BikeCity.csv')

# creating variables for the request
FOURSQUARE_KEY = os.environ['FOURSQUARE_API_KEY']
url = "https://api.foursquare.com/v3/places/search"

# one list that will contain a bunch of dictionaries
FScontainerBARS = []

# loops through all the different bike stations, grabbing all the restaurants/bars within a 1000m radius
for index, row in bikeDF.iterrows():
    lat = row['Latitude']
    long = row['Longitude']

    # set parameters so it grabs specific information on bar/restuarants
    params = {
        'll': f'{lat},{long}',
        'categoryId': '13003,13065',  # ID for dining and drinking
        "radius": '1000',
        'fields': 'name,location,categories,distance,rating'
    }
    headers = {
        "Accept": "application/json",
        "Authorization": FOURSQUARE_KEY
    }

    # gather the data and turn it into a JSON
    request = requests.get(url, params=params, headers=headers)


    data = request.json()

    # create a more refined JSON which goes into the 'results' key (this will make it easier to call specific elements later)
    resultsJSON = data['results']
    # turn the refined JSON into a dataframe(easier to grab info from)
    resultsDF = pd.DataFrame(resultsJSON)

    # #if the bar/restaurant has a rating, save it as a var
    # #if the bar/restaurant has no rating, set it a null/None
    try:
        resultsDF['rating']
    except:
        BarRating = 0
    else:
        BarRating = resultsDF['rating']


    # add a dictionary, containing all the relavent info, into the empty list
    # each entry in the list corresponds to a singular bike station
    FScontainerBARS.append({
        'Latitude': lat,
        'Longitude': long,
        'Rest/Bar Names': resultsDF['name'],
        'Distance': resultsDF['distance'],
        'Rest/ Bar Rating': BarRating
    })


Repeat process to get all gyms within a 1000m of each bike station

In [268]:
"""GYM INFORMATION"""

#one list that will contain a bunch of dictionaries
FScontainerGYMS =[]

#loops through all the different bike stations, grabbing all the gyms within a 1000m radius
for index, row in bikeDF.iterrows():
    lat = row['Latitude']
    long = row['Longitude']

    # set parameters so it grabs specific information on gyms
    params = {
        'll': f'{lat},{long}',
        'categories': 18021,  # ID for gyms and studios
        "radius": '1000',
        'fields': 'name,location,categories,distance'
    }
    headers = {
        "Accept": "application/json",
        "Authorization": FOURSQUARE_KEY
    }

    #gather the data and turn it into a JSON
    request = requests.get(url, params=params, headers=headers)
    data=request.json()

    #create a more refined JSON which goes into the 'results' key (this will make it easier to call specific elements later)
    resultsJSON = data['results']
    #turn the refined JSON into a dataframe(easier to grab info from)
    resultsDF = pd.DataFrame(resultsJSON)
    


    #if there is a gym within a 1km radious, then save the names and distances
    #if there is no gym in the area, set name as null and distance as 0 (easier for calculations and spotting outliers later)
    if len(resultsJSON) > 0:
        GYMname =resultsDF['name']
        GYMdistance = resultsDF['distance']
    else:
        GYMname = None
        GYMdistance = 0,0 #double zero so we can use lambda later when calculating averages
    
    #add a dictionary, containing all the relavent info, into the empty list
    #each entry in the list corresponds to a singular bike station
    FScontainerGYMS.append({
            'Latitude': lat,
            'Longitude': long,
            'Gym Names': GYMname,
            'Distance': GYMdistance
            })


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
#requests and parsing of response had been completed in prior step/cell

Put your parsed results into a DataFrame

In [271]:
#Parsing the information into dataframes
FSbar_DF = pd.DataFrame(FScontainerBARS)
FSgym_DF = pd.DataFrame(FScontainerGYMS)

FSbar_DF
FSgym_DF

Unnamed: 0,Latitude,Longitude,Gym Names,Distance
0,46.786588,-71.258231,0 Centre de Yoga Ste...,0 660 1 672 2 680 3 807 4 668 5...
1,46.784041,-71.249391,0 Centre d'Entraînement Privé Pierr...,"0 140 1 137 2 149 Name: distance, dty..."
2,46.829433,-71.244066,0 Centre de Yoga Qué...,0 892 1 262 2 421 3 703 4 765 5...
3,46.812403,-71.220411,0 Planete Fit...,0 460 1 394 2 248 3 537 4 604 5...
4,46.826553,-71.245978,0 Danz Québec 1 Ba...,0 381 1 625 2 674 3 754 Name: dist...
...,...,...,...,...
69,46.814340,-71.224898,0 Planete Fit...,0 567 1 790 2 142 3 268 4 398 5...
70,46.816606,-71.241658,0 Énergie Cardio 1 Pi...,0 890 1 467 2 697 3 827 4 501 5...
71,46.783205,-71.276189,0 Stationnement du PEPS 1 ...,0 114 1 592 2 618 3 872 4 958 5...
72,46.811548,-71.235388,0 L'École de danse d...,0 333 1 478 2 676 3 738 4 ...


In [272]:
#Transforming data to get average distances
FSbar_DF['Average Distance_Bar'] = FSbar_DF['Distance'].apply(lambda x: sum(x)/len(x))
FSgym_DF['Average Distance_Gym'] = FSgym_DF['Distance'].apply(lambda x: sum(x)/len(x))

#left join 
FOURSQUARE_DF = pd.merge(FSbar_DF,FSgym_DF, on=['Latitude','Longitude'], how='left')

#cleaning data 
FOURSQUARE_DF.drop(columns=['Rest/Bar Names', 'Distance_x', 'Gym Names','Distance_y'], inplace=True)
FOURSQUARE_DF

Unnamed: 0,Latitude,Longitude,Rest/ Bar Rating,Average Distance_Bar,Average Distance_Gym
0,46.786588,-71.258231,0 8.1 1 NaN 2 8.5 3 7.0 4 7.7 5...,711.3,777.125000
1,46.784041,-71.249391,0 7.8 1 8.5 2 6.6 3 7.7 4 7.0 5...,355.2,142.000000
2,46.829433,-71.244066,0 8.1 1 NaN 2 NaN 3 NaN 4 6.0 5...,302.2,701.857143
3,46.812403,-71.220411,0 9.1 1 8.6 2 8.5 3 8.9 4 9.0 5...,376.9,568.800000
4,46.826553,-71.245978,0 8.1 1 9.1 2 NaN 3 NaN 4 NaN 5...,395.1,608.500000
...,...,...,...,...,...
69,46.814340,-71.224898,0 8.6 1 8.5 2 9.0 3 8.8 4 8.9 5...,345.0,517.500000
70,46.816606,-71.241658,0 8.6 1 7.9 2 7.8 3 8.6 4 9.2 5...,663.5,688.571429
71,46.783205,-71.276189,0 7.6 1 NaN 2 8.4 3 8.1 4 7.1 5...,602.4,678.500000
72,46.811548,-71.235388,0 8.6 1 9.2 2 8.0 3 8.5 4 9.1 5...,223.1,674.300000


In [273]:
#Saving the dataframe as a .csv

#setting path
path = "../data/FOURSQUARE_DF.csv"

#saving
FOURSQUARE_DF.to_csv(path, index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [265]:
"""BARS AND RESTAURANTS"""
#creating variables for the request
YELP_KEY = os.environ['YELP_API_KEY']
url = "https://api.yelp.com/v3/businesses/search"

#one list that will contain a bunch of dictionaries
YELPcontainerBARS =[]

#loops through all the different bike stations, grabbing all the restaurants/bars within a 1000m radius
for index, row in bikeDF.iterrows():
    lat = row['Latitude']
    long = row['Longitude']

    params = {
        'latitude': lat,
        'longitude': long,
        'radius': '1000',
        'categories': 'restaurants,bars',
    }
    headers = {
        'Authorization': 'Bearer '+ YELP_KEY
    }

    response = requests.get(url, params=params, headers=headers)
    YELPdata = response.json()


    #create a more refined JSON which goes into the 'busisnesses' key (this will make it easier to call specific elements later)
    YELPJSON = YELPdata.get('businesses', [])
    
    #turn the refined JSON into a dataframe(easier to grab info from)
    resultsDF = pd.DataFrame(YELPJSON)

    # #if the bar/restaurant has a rating, save it as a var
    # #if the bar/restaurant has no rating, set it a null/None
    try:
        resultsDF['rating']
    except:
        BarRating = 0
    else:
        BarRating = resultsDF['rating']

    #add a dictionary, containing all the relavent info, into the empty list
    #each entry in the list corresponds to a singular bike station
    YELPcontainerBARS.append({
        'Latitude': lat,
        'Longitude': long,
        'Names':resultsDF['name'] ,
        'Distance': resultsDF['distance'],
        'REST/BAR RATING': BarRating
        })
    

KeyError: 'name'

In [263]:
"""GYM INFORMATION"""
#creating variables for the request

#one list that will contain a bunch of dictionaries
YELPcontainerGYM =[]

#loops through all the different bike stations, grabbing all the restaurants/bars within a 1000m radius
for index, row in bikeDF.iterrows():
    lat = row['Latitude']
    long = row['Longitude']

    params = {
        'latitude': lat,
        'longitude': long,
        'radius': '1000',
        'categories': 'gyms',
    }
    headers = {
        'Authorization': 'Bearer '+ YELP_KEY
    }

    response = requests.get(url, params=params, headers=headers)
    YELPdata = response.json()


    #create a more refined JSON which goes into the 'busisnesses' key (this will make it easier to call specific elements later)
    YELPJSON = YELPdata['businesses']

    #turn the refined JSON into a dataframe(easier to grab info from)
    resultsDF = pd.DataFrame(YELPJSON)


    #if there is a gym within a 1km radious, then save the names and distances
    #if there is no gym in the area, set name as null and distance as 0 (easier for calculations and spotting outliers later)
    if len(resultsJSON) > 0:
        GYMname =resultsDF['name']
        GYMdistance = resultsDF['distance']
    else:
        GYMname = None
        GYMdistance = 0,0 #double zero so we can use lambda later when calculating averages
    

    #add a dictionary, containing all the relavent info, into the empty list
    #each entry in the list corresponds to a singular bike station
    YELPcontainerGYM.append({
        'Latitude': lat,
        'Longitude': long,
        'Names': GYMname,
        'Distance': GYMdistance,
        })
    

KeyError: 'name'

In [294]:
os.environ['YELP_API_KEY'] = '6yiSxUAio9u68EcInQcLsgTEab2RjsAerqLy4vV_jXKg6PhHiyYOfWWj6dciNNWd1Bf_WklG7aSrw8KEfR8d71bKaCdM0Qi5GRlbzM6KBzKWbFatRnyTeYH5wcw1ZXYx'

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [262]:
YELP_KEY = os.environ['YELP_API_KEY']
url = "https://api.yelp.com/v3/businesses/search"

# one list that will contain a bunch of dictionaries
YELPcontainer = []

params = {
    'latitude': '37.77329',
    'longitude': '-122.4225',
    'radius': '1000',
    'categories': 'restaurants,bars',
}
headers = {
    'Authorization': 'Bearer ' + YELP_KEY
}

response = requests.get(url, params=params, headers=headers)
datajson = response.json()

yelp_results = datajson['businesses']
yelp_results

# frame = pd.DataFrame(datajson)
# frame

[{'id': 'ciEDsTWhajcdL3KuJqBRlw',
  'alias': 'espetus-churrascaria-san-francisco-2',
  'name': 'Espetus Churrascaria',
  'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/F9G1pFFitfi9F4rJw_nrpQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/espetus-churrascaria-san-francisco-2?adjust_creative=Zz1LRyiQ_Yhi7ICiN83FzA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=Zz1LRyiQ_Yhi7ICiN83FzA',
  'review_count': 3837,
  'categories': [{'alias': 'steak', 'title': 'Steakhouses'},
   {'alias': 'latin', 'title': 'Latin American'},
   {'alias': 'brazilian', 'title': 'Brazilian'}],
  'rating': 4.0,
  'coordinates': {'latitude': 37.7733327504928,
   'longitude': -122.422131667494},
  'transactions': ['restaurant_reservation', 'delivery'],
  'price': '$$$$',
  'location': {'address1': '1686 Market St',
   'address2': '',
   'address3': '',
   'city': 'San Francisco',
   'zip_code': '94102',
   'country': 'US',
   'state': 'CA',
   'display_address': ['1686 M

Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating

In [293]:

#empty lists which will later be used for columns in the datafram
names = []
ratings = []

#un-nest the FS dataframes and fill lists
station=0
while station < len(FSbar_DF['Rest/ Bar Rating']):
    names.append(FSbar_DF['Rest/Bar Names'][station].to_list()) 
    ratings.append(FSbar_DF['Rest/ Bar Rating'][station])
    station +=1
    
#list of all restaurants in a flattened list
NamesList= list(np.concatenate(names))
NamesList

#extracting info out of each the series in the ratings variable, turning it into a flattened list
flattened_ratings = []
for series in ratings:
    try:
        flattened_ratings.extend(series) # Attempt to iterate over the series
    except TypeError:
        flattened_ratings.append(series) # If TypeError occurs, treat the element as a single item

#creating a DF with our cleaned data
RESTAURANTSwithRATINGS = pd.DataFrame(NamesList, columns=['Restaurants'])
RESTAURANTSwithRATINGS['Ratings'] = flattened_ratings

#removing duplicates (31 duplicates)
RESTAURANTSwithRATINGScleaned = RESTAURANTSwithRATINGS.drop_duplicates()

#top 10 restaurants
RESTAURANTSwithRATINGScleaned.sort_values('Ratings', ascending=False).head(10)


Unnamed: 0,Restaurants,Ratings
130,Terrasse Dufferin,9.3
107,Plains of Abraham (Plaines d'Abraham),9.3
111,Restaurant le Saint-Amour,9.2
171,Mille et une Pizzas,9.2
381,Le Bouchon du Pied Bleu,9.2
173,Restaurant Légende,9.1
30,Cantook Micro Torréfaction,9.1
167,Soupe & Cie,9.1
385,Patente et Machin,9.1
41,Videotron Center,9.1
