In [1]:
# imports
import requests
import pandas as pd
import os
import time

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
df2 = pd.read_csv("/Users/ojefua/Documents/Projects/lhl-statistical-modelling-project/data/dublin_city_bikes.csv")

FOURSQUARE_API_KEY = os.environ['FOURSQUARE_API_KEY']

results = []

for index, row in df2.iterrows():
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        # 'location': 'Dublin, Ireland',
        'll': f'{latitude},{longitude}',
        'categoryId': '13000,12013',    # for restaurants and bars
        'radius': 1000,
        'fields': 'name,location,rating,categories,distance,description,popularity,price',
        'sort': 'RATING'
        # 'categories': 'restaurants,bars,university_housing',
        # 'limit': 12  # Number of results to retrieve
    }
    headers = {
        'Accept': 'application/json',
        'Authorization': FOURSQUARE_API_KEY
    }

    # Rate-limit API calls
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
    else:
        data = response.json()

    res = data.get('results')
    frame = pd.DataFrame(res)
    results.append({
        'Latitude': latitude,
        'Longitude': longitude,
        'Foursquare Name': frame['name'],
        'Distance': frame['distance'],
        'Popularity': frame['popularity'],
        'Rating': frame['rating'],
        'Price': frame['price']
        })


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [3]:
df_foursquare = pd.DataFrame(results)

df_foursquare

Unnamed: 0,Latitude,Longitude,Foursquare Name,Distance,Popularity,Rating,Price
0,53.350230,-6.279696,0 Urbanity Coffee 1 The Porterhous...,0 380 1 985 2 286 3 825 4 658 5...,0 0.992943 1 0.995943 2 0.956118 3 ...,0 9.3 1 9.2 2 9.1 3 9.1 4 9.1 5...,0 1.0 1 2.0 2 NaN 3 2.0 4 1.0 5...
1,53.357841,-6.251557,0 147 Deli 1 Gate The...,0 736 1 869 2 305 3 942 4 1...,0 0.977602 1 0.929149 2 0.997086 3 ...,0 9.4 1 9.0 2 9.0 3 9.0 4 8.9 5...,0 1.0 1 NaN 2 NaN 3 1.0 4 1.0 5...
2,53.356307,-6.273717,0 147 Deli 1 Lill...,0 977 1 983 2 929 3 915 4 ...,0 0.977602 1 0.956118 2 0.924549 3 ...,0 9.4 1 9.1 2 9.1 3 9.1 4 9.0 5...,0 1.0 1 NaN 2 1.0 3 NaN 4 1.0 5...
3,53.349562,-6.278198,0 St Patrick's Park 1 Urbanity Coffe...,0 1130 1 308 2 861 3 386 4 ...,0 0.999257 1 0.992943 2 0.995943 3 ...,0 9.3 1 9.3 2 9.2 3 9.1 4 9.1 5...,0 NaN 1 1.0 2 2.0 3 NaN 4 2.0 5...
4,53.336021,-6.262980,0 St. Stephen's Green 1 ...,0 346 1 764 2 745 3 948 4 776 5...,0 0.999829 1 0.997400 2 0.999257 3 ...,0 9.5 1 9.4 2 9.3 3 9.2 4 9.1 5...,0 NaN 1 NaN 2 NaN 3 NaN 4 1.0 5...
...,...,...,...,...,...,...,...
109,53.336597,-6.248109,0 St. Stephen's Green 1 ...,0 753 1 900 2 423 3 865 4 884 5...,0 0.999829 1 0.997400 2 0.999486 3 ...,0 9.5 1 9.4 2 9.2 3 9.1 4 9.1 5...,0 NaN 1 NaN 2 NaN 3 NaN 4 NaN 5...
110,53.353742,-6.265301,0 147 Deli 1 The ...,0 366 1 965 2 970 3 761 4 703 5...,0 0.977602 1 0.995943 2 0.963575 3 ...,0 9.4 1 9.2 2 9.2 3 9.1 4 9.1 5...,0 1.0 1 2.0 2 3.0 3 2.0 4 1.0 5...
111,53.344603,-6.263371,0 St. Stephen's Green 1 Hodges...,0 769 1 449 2 754 3 281 4 990 5...,0 0.999829 1 0.997400 2 0.999257 3 ...,0 9.5 1 9.4 2 9.3 3 9.2 4 9.2 5...,0 NaN 1 NaN 2 NaN 3 2.0 4 NaN 5...
112,53.347692,-6.278214,0 St Patrick's Park 1 ...,0 941 1 110 2 763 3 456 4 655 5...,0 0.999257 1 0.992943 2 0.995943 3 ...,0 9.3 1 9.3 2 9.2 3 9.1 4 9.1 5...,0 NaN 1 1.0 2 2.0 3 NaN 4 2.0 5...


In [4]:
# Specify the file path for the CSV
csv_file_path = "/Users/ojefua/Documents/Projects/lhl-statistical-modelling-project/data/foursquare_nested_places.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

In [10]:
# # avg_d = []
# for index, row in df_foursquare.iterrows():
#    # df_foursquare['Avg_dist_F'] = ((row['Distance'].sum()) / (len(row['Distance'])))
# #    avg_d = (((row['Distance'].sum()) / (len(row['Distance']))))
#     df_foursquare['Average_dist_F'] = (row['Distance'].sum() / len(row['Distance']))
#     df_foursquare['Average_popularity_F'] = (row['Popularity'].sum() / len(row['Popularity']))
#     df_foursquare['Average_rating_F'] = (row['Rating'].sum() / len(row['Rating']))
#     df_foursquare['Average_price_F'] = (row['Price'].sum() / len(row['Price']))
# # df_foursquare
# # avg_d

df_foursquare['Average_dist_F'] = df_foursquare['Distance'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_rating_F'] = df_foursquare['Rating'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_price_F'] = df_foursquare['Price'].apply(lambda x: sum(x) / len(x))


In [12]:
df_foursquare.drop(columns=['Foursquare Name', 'Distance', 'Popularity', 'Rating', 'Price', 'Average_price_F'], inplace=True)
df_foursquare

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F
0,53.350230,-6.279696,606.1,0.967403,9.09
1,53.357841,-6.251557,824.0,0.963300,8.91
2,53.356307,-6.273717,999.3,0.969428,9.01
3,53.349562,-6.278198,584.4,0.972062,9.13
4,53.336021,-6.262980,709.7,0.990946,9.20
...,...,...,...,...,...
109,53.336597,-6.248109,782.5,0.976231,9.13
110,53.353742,-6.265301,646.9,0.967628,9.10
111,53.344603,-6.263371,433.1,0.990964,9.22
112,53.347692,-6.278214,566.9,0.971522,9.14


In [13]:
# Specify the file path for the CSV
csv_file_path = "/Users/ojefua/Documents/Projects/lhl-statistical-modelling-project/data/foursquare_features.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [26]:
YELP_API_KEY = os.environ['YELP_API_KEY']

results = []

for index, row in df2.iterrows():
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.yelp.com/v3/businesses/search'
    params = {
        # 'location': 'Dublin, Ireland',
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'categories': 'restaurants,bars',
        # 'categories': 'restaurants,bars,university_housing',
        'sort_by': 'rating'
        # 'limit': 10  # Number of results to retrieve
    }
    headers = {
        'Authorization': f'Bearer {YELP_API_KEY}'
    }

    # Rate-limit API calls
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
        break
    else:
        data = response.json()

    yelp_results = data.get('businesses', [])
    frame = pd.DataFrame(yelp_results)
    results.append({
    'Latitude': latitude,
    'Longitude': longitude,
    'Yelp Name': frame['name'],
    'Distance': frame['distance'],
    'Review count': frame['review_count'],
    'Rating': frame['rating'],
    'Price': frame['price']
    })



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [27]:
df_yelp = pd.DataFrame(results)

# print(df_yelp)
df_yelp

Unnamed: 0,Latitude,Longitude,Yelp Name,Distance,Review count,Rating,Price
0,53.350230,-6.279696,0 La Pausa Cafe 1 Bar 166...,0 1144.067227 1 625.352533 2 991...,0 5 1 7 2 6 3 17 4 33 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 NaN 3 €€ 4 ...
1,53.357841,-6.251557,0 La Pausa Cafe 1 T...,0 952.731369 1 1060.959965 2 626...,0 5 1 7 2 6 3 15 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 NaN 3 €€ 4 ...
2,53.356307,-6.273717,0 La Pausa Cafe 1 ...,0 535.941122 1 808.969104 2 1200...,0 5 1 7 2 11 3 15 4 24 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 € 3 €€ 4 ...
3,53.349562,-6.278198,0 La Pausa Cafe 1 ...,0 1112.334259 1 518.209459 2 1013...,0 5 1 7 2 11 3 6 4 17 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 € 3 NaN 4 ...
4,53.336021,-6.262980,0 Rural Pub Tours 1 ...,0 914.924154 1 943.967405 2 542...,0 20 1 6 2 11 3 8 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 NaN 2 €€ 3 NaN 4 ...
...,...,...,...,...,...,...,...
109,53.336597,-6.248109,0 Rural Pub Tours 1 Asahi Asia...,0 731.409474 1 1064.236594 2 731...,0 20 1 11 2 8 3 11 4 5 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 €€ 2 NaN 3 €€€ 4 ...
110,53.353742,-6.265301,0 Rural Pub Tours 1 ...,0 1490.564706 1 273.098434 2 868...,0 20 1 5 2 6 3 7 4 11 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 € 2 NaN 3 NaN 4 ...
111,53.344603,-6.263371,0 Rural Pub Tours 1 ...,0 684.379403 1 222.402891 2 700...,0 20 1 6 2 7 3 11 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 NaN 2 NaN 3 € 4 ...
112,53.347692,-6.278214,0 La Pausa Cafe 1 ...,0 1260.467200 1 548.677476 2 987...,0 5 1 7 2 11 3 6 4 6 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 € 3 NaN 4 ...


In [28]:
# Specify the file path for the CSV
csv_file_path = "/Users/ojefua/Documents/Projects/lhl-statistical-modelling-project/data/yelp_nested_places.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

In [29]:
df_yelp['Average_distance_Y'] = df_yelp['Distance'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_review_count_Y'] = df_yelp['Review count'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_rating_Y'] = df_yelp['Rating'].apply(lambda x: sum(x) / len(x))

In [31]:
df_yelp.drop(columns=['Yelp Name', 'Distance', 'Review count', 'Rating', 'Price'], inplace=True)
df_yelp

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y
0,53.350230,-6.279696,813.634974,21.05,4.725
1,53.357841,-6.251557,852.302705,28.95,4.675
2,53.356307,-6.273717,839.916357,19.25,4.725
3,53.349562,-6.278198,777.232101,16.45,4.750
4,53.336021,-6.262980,689.425498,29.90,4.775
...,...,...,...,...,...
109,53.336597,-6.248109,690.606529,18.15,4.675
110,53.353742,-6.265301,625.914671,17.00,4.800
111,53.344603,-6.263371,676.468985,15.85,4.875
112,53.347692,-6.278214,768.840359,15.25,4.775


In [32]:
# Specify the file path for the CSV
csv_file_path = "/Users/ojefua/Documents/Projects/lhl-statistical-modelling-project/data/yelp_features.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating

In [16]:
df_foursquare.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Latitude              114 non-null    float64
 1   Longitude             114 non-null    float64
 2   Average_dist_F        114 non-null    float64
 3   Average_popularity_F  114 non-null    float64
 4   Average_rating_F      114 non-null    float64
dtypes: float64(5)
memory usage: 4.6 KB


In [33]:
df_yelp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Latitude                114 non-null    float64
 1   Longitude               114 non-null    float64
 2   Average_distance_Y      114 non-null    float64
 3   Average_review_count_Y  114 non-null    float64
 4   Average_rating_Y        114 non-null    float64
dtypes: float64(5)
memory usage: 4.6 KB


In [17]:
df_foursquare.describe()

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F
count,114.0,114.0,114.0,114.0,114.0
mean,53.345562,-6.264373,679.675439,0.974287,9.005614
std,0.007612,0.01798,101.48343,0.011732,0.225408
min,53.330091,-6.310015,422.1,0.94853,8.14
25%,53.340001,-6.275193,613.025,0.965435,8.91
50%,53.344903,-6.263106,683.75,0.973147,9.06
75%,53.350769,-6.251665,732.95,0.98584,9.16
max,53.359967,-6.230852,1012.2,0.992698,9.23


In [34]:
df_yelp.describe()

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y
count,114.0,114.0,114.0,114.0,114.0
mean,53.345562,-6.264373,764.015341,21.497368,4.709211
std,0.007612,0.01798,88.756071,6.473024,0.132507
min,53.330091,-6.310015,559.662517,11.0,4.1
25%,53.340001,-6.275193,689.720755,16.4125,4.65
50%,53.344903,-6.263106,764.026115,20.025,4.725
75%,53.350769,-6.251665,833.049087,26.5375,4.8
max,53.359967,-6.230852,962.232056,37.15,4.9


In [25]:
# Order rows by values of a column (high to low).
df_foursquare.sort_values('Average_rating_F', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F
74,53.345922,-6.254614,695.4,0.988798,9.23
7,53.343368,-6.27012,607.5,0.992286,9.23
85,53.344007,-6.266802,531.7,0.992286,9.23
40,53.348875,-6.267459,678.9,0.988087,9.22
100,53.346057,-6.268001,534.5,0.988087,9.22
99,53.3483,-6.266651,629.2,0.988087,9.22
10,53.338755,-6.262003,556.9,0.991686,9.22
66,53.339334,-6.264699,610.3,0.992698,9.22
67,53.337494,-6.26199,670.2,0.992698,9.22
32,53.343034,-6.263578,440.9,0.991686,9.22


In [36]:
df_yelp.sort_values('Average_rating_Y', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y
100,53.346057,-6.268001,710.820345,12.25,4.9
24,53.340803,-6.267732,764.550186,19.1,4.9
52,53.350291,-6.273507,810.964016,12.55,4.875
111,53.344603,-6.263371,676.468985,15.85,4.875
108,53.340927,-6.262501,680.372535,19.2,4.875
98,53.351182,-6.269859,689.416528,12.0,4.875
7,53.343368,-6.27012,766.265454,19.15,4.875
57,53.341288,-6.258117,669.474643,20.55,4.85
34,53.3547,-6.272314,901.152122,14.5,4.85
40,53.348875,-6.267459,617.347611,15.4,4.85
