In [37]:
# imports
import requests
import pandas as pd
import os
import time

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [38]:
df2 = pd.read_csv("../data/dublin_city_bikes.csv")

FOURSQUARE_API_KEY = os.environ['FOURSQUARE_API_KEY']

results = []

for index, row in df2.iterrows():
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        # 'location': 'Dublin, Ireland',
        'll': f'{latitude},{longitude}',
        'categoryId': '13000,12013',    # for restaurants and bars
        'radius': 1000,
        'fields': 'name,location,rating,categories,distance,description,popularity,price',
        'sort': 'RATING'
    }
    headers = {
        'Accept': 'application/json',
        'Authorization': FOURSQUARE_API_KEY
    }

    # Rate-limit API calls to avoid restriction
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
    else:
        data = response.json()

    res = data.get('results')
    frame = pd.DataFrame(res)
    results.append({
        'Latitude': latitude,
        'Longitude': longitude,
        'Foursquare Name': frame['name'],
        'Distance': frame['distance'],
        'Popularity': frame['popularity'],
        'Rating': frame['rating'],
        'Price': frame['price']
        })


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# Parsing was done within for loop for each station in cell above.

Put your parsed results into a DataFrame

In [44]:
df_foursquare = pd.DataFrame(results)

df_foursquare

Unnamed: 0,Latitude,Longitude,Foursquare Name,Distance,Popularity,Rating,Price
0,53.350230,-6.279696,0 Urbanity Coffee 1 ...,0 380 1 985 2 825 3 658 4 177 5...,0 0.992544 1 0.995915 2 0.975432 3 ...,0 9.3 1 9.2 2 9.1 3 9.1 4 9.1 5...,0 1.0 1 2.0 2 2.0 3 1.0 4 NaN 5...
1,53.357841,-6.251557,0 147 Deli 1 Gate The...,0 736 1 869 2 305 3 942 4 1...,0 0.977746 1 0.936266 2 0.997029 3 ...,0 9.4 1 9.0 2 9.0 3 8.9 4 8.9 5...,0 1.0 1 NaN 2 NaN 3 1.0 4 1.0 5...
2,53.356307,-6.273717,0 147 Deli 1 ...,0 977 1 929 2 915 3 854 4 ...,0 0.977746 1 0.923782 2 0.989202 3 ...,0 9.4 1 9.1 2 9.1 3 9.0 4 9.0 5...,0 1.0 1 1.0 2 NaN 3 NaN 4 1.0 5...
3,53.349562,-6.278198,0 St Patrick's Park 1 Urbanity Coffe...,0 1130 1 308 2 861 3 705 4 ...,0 0.999257 1 0.992544 2 0.995915 3 ...,0 9.3 1 9.3 2 9.2 3 9.1 4 9.1 5...,0 NaN 1 1.0 2 2.0 3 2.0 4 1.0 5...
4,53.336021,-6.262980,0 St. Stephen's Green 1 ...,0 346 1 764 2 745 3 948 4 776 5...,0 0.999829 1 0.997372 2 0.999257 3 ...,0 9.5 1 9.4 2 9.3 3 9.2 4 9.2 5...,0 NaN 1 NaN 2 NaN 3 NaN 4 1.0 5...
...,...,...,...,...,...,...,...
109,53.336597,-6.248109,0 St. Stephen's Green 1 ...,0 753 1 900 2 423 3 865 4 884 5...,0 0.999829 1 0.997372 2 0.999486 3 ...,0 9.5 1 9.4 2 9.2 3 9.2 4 9.1 5...,0 NaN 1 NaN 2 NaN 3 NaN 4 NaN 5...
110,53.353742,-6.265301,0 147 Deli 1 The ...,0 366 1 965 2 970 3 761 4 703 5...,0 0.977746 1 0.995915 2 0.963005 3 ...,0 9.4 1 9.2 2 9.2 3 9.1 4 9.1 5...,0 1.0 1 2.0 2 3.0 3 2.0 4 1.0 5...
111,53.344603,-6.263371,0 St. Stephen's Green 1 Hodges...,0 769 1 449 2 754 3 990 4 281 5...,0 0.999829 1 0.997372 2 0.999257 3 ...,0 9.5 1 9.4 2 9.3 3 9.2 4 9.2 5...,0 NaN 1 NaN 2 NaN 3 NaN 4 2.0 5...
112,53.347692,-6.278214,0 St Patrick's Park 1 ...,0 941 1 110 2 763 3 655 4 522 5...,0 0.999257 1 0.992544 2 0.995915 3 ...,0 9.3 1 9.3 2 9.2 3 9.1 4 9.1 5...,0 NaN 1 1.0 2 2.0 3 2.0 4 1.0 5...


In [41]:
# Specify the file path for the CSV
csv_file_path = "../data/foursquare_nested_places.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

In [45]:
# Data Transformation

df_foursquare['Average_dist_F'] = df_foursquare['Distance'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_rating_F'] = df_foursquare['Rating'].apply(lambda x: sum(x) / len(x))

df_foursquare['Max_rating_F'] = df_foursquare['Rating'].apply(lambda x: max(x))
df_foursquare['Max_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: max(x))

df_foursquare['Min_rating_F'] = df_foursquare['Rating'].apply(lambda x: min(x))
df_foursquare['Min_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: min(x))


In [47]:
# Cleaning

df_foursquare.drop(columns=['Foursquare Name', 'Distance', 'Popularity', 'Rating', 'Price'], inplace=True)
df_foursquare

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
0,53.350230,-6.279696,618.5,0.969202,9.09,9.3,0.995915,8.9,0.923782
1,53.357841,-6.251557,824.0,0.964197,8.90,9.4,0.997029,8.7,0.897986
2,53.356307,-6.273717,999.3,0.970233,8.98,9.4,0.998886,8.7,0.923782
3,53.349562,-6.278198,584.4,0.972195,9.13,9.3,0.999257,9.0,0.923782
4,53.336021,-6.262980,709.7,0.991156,9.22,9.5,0.999829,9.1,0.958349
...,...,...,...,...,...,...,...,...,...
109,53.336597,-6.248109,782.5,0.977200,9.14,9.5,0.999829,9.0,0.914641
110,53.353742,-6.265301,646.9,0.968302,9.09,9.4,0.995915,8.9,0.923782
111,53.344603,-6.263371,433.1,0.990933,9.23,9.5,0.999829,9.1,0.963005
112,53.347692,-6.278214,567.5,0.973375,9.14,9.3,0.999257,9.0,0.923782


In [48]:
# Specify the file path for the CSV
csv_file_path = "../data/foursquare_features.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

In [50]:
duplicate_rows = df_foursquare[df_foursquare.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [51]:
YELP_API_KEY = os.environ['YELP_API_KEY']

results = []

for index, row in df2.iterrows():       # df2 is data from CityBikes API already instantiated in Foursquare API request above.
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.yelp.com/v3/businesses/search'
    params = {
        # 'location': 'Dublin, Ireland',
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'categories': 'restaurants,bars',
        'sort_by': 'rating'
    }
    headers = {
        'Authorization': f'Bearer {YELP_API_KEY}'
    }

    # Rate-limit API calls to avoid restriction
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
        break
    else:
        data = response.json()

    yelp_results = data.get('businesses', [])
    frame = pd.DataFrame(yelp_results)
    results.append({
    'Latitude': latitude,
    'Longitude': longitude,
    'Yelp Name': frame['name'],
    'Distance': frame['distance'],
    'Review count': frame['review_count'],
    'Rating': frame['rating'],
    'Price': frame['price']
    })



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [19]:
# Parsing was done within for loop for each station in cell above.

Put your parsed results into a DataFrame

In [52]:
df_yelp = pd.DataFrame(results)

df_yelp

Unnamed: 0,Latitude,Longitude,Yelp Name,Distance,Review count,Rating,Price
0,53.350230,-6.279696,0 La Pausa Cafe 1 Bar 166...,0 1144.067227 1 625.352533 2 991...,0 5 1 7 2 6 3 17 4 33 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 NaN 3 €€ 4 ...
1,53.357841,-6.251557,0 La Pausa Cafe 1 T...,0 952.731369 1 1060.959965 2 626...,0 5 1 7 2 6 3 15 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 NaN 2 NaN 3 €€ 4 ...
2,53.356307,-6.273717,0 La Pausa Cafe 1 Thai Jasmin...,0 535.941122 1 1200.229622 2 808...,0 5 1 11 2 7 3 15 4 8 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 € 2 NaN 3 €€ 4 ...
3,53.349562,-6.278198,0 La Pausa Cafe 1 Thai Jasmin...,0 1112.334259 1 1013.261503 2 518...,0 5 1 11 2 7 3 6 4 17 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 € 1 € 2 NaN 3 NaN 4 ...
4,53.336021,-6.262980,0 Rural Pub Tours 1 ...,0 914.924154 1 943.967405 2 542...,0 20 1 6 2 11 3 8 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 NaN 2 €€ 3 NaN 4 ...
...,...,...,...,...,...,...,...
109,53.336597,-6.248109,0 Rural Pub Tours 1 Asahi Asia...,0 731.409474 1 1064.236594 2 731...,0 20 1 11 2 8 3 11 4 5 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 €€ 2 NaN 3 €€€ 4 ...
110,53.353742,-6.265301,0 Rural Pub Tours 1 ...,0 1490.564706 1 868.897173 2 273...,0 20 1 6 2 5 3 11 4 7 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 NaN 2 € 3 € 4 ...
111,53.344603,-6.263371,0 Rural Pub Tours 1 ...,0 684.379403 1 222.402891 2 976...,0 20 1 6 2 6 3 11 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 €€ 1 NaN 2 NaN 3 € 4 ...
112,53.347692,-6.278214,0 Spitalfields 1 La...,0 891.426827 1 1260.467200 2 987...,0 6 1 5 2 11 3 7 4 6 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...,0 NaN 1 € 2 € 3 NaN 4 ...


In [53]:
# Specify the file path for the CSV
csv_file_path = "../data/yelp_nested_places.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

In [54]:
# Data Transformation

df_yelp['Average_distance_Y'] = df_yelp['Distance'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_review_count_Y'] = df_yelp['Review count'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_rating_Y'] = df_yelp['Rating'].apply(lambda x: sum(x) / len(x))

df_yelp['Max_rating_Y'] = df_yelp['Rating'].apply(lambda x: max(x))
df_yelp['Max_review_count'] = df_yelp['Review count'].apply(lambda x: max(x))

df_yelp['Min_rating_Y'] = df_yelp['Rating'].apply(lambda x: min(x))
df_yelp['Min_review_count'] = df_yelp['Review count'].apply(lambda x: min(x))

In [56]:
# Cleaning

df_yelp.drop(columns=['Yelp Name', 'Distance', 'Review count', 'Rating', 'Price'], inplace=True)
df_yelp

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
0,53.350230,-6.279696,813.634974,21.05,4.725,5.0,97,4.5,4
1,53.357841,-6.251557,852.302705,28.95,4.675,5.0,106,4.5,3
2,53.356307,-6.273717,834.061440,17.85,4.725,5.0,56,4.5,4
3,53.349562,-6.278198,777.232101,16.45,4.750,5.0,50,4.5,4
4,53.336021,-6.262980,689.425498,29.90,4.775,5.0,165,4.5,5
...,...,...,...,...,...,...,...,...,...
109,53.336597,-6.248109,690.606529,18.15,4.675,5.0,68,4.5,5
110,53.353742,-6.265301,625.914671,17.00,4.800,5.0,50,4.5,4
111,53.344603,-6.263371,676.468985,15.85,4.875,5.0,101,4.5,4
112,53.347692,-6.278214,768.840359,15.25,4.775,5.0,50,4.5,4


In [57]:
# Specify the file path for the CSV
csv_file_path = "../data/yelp_features.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

In [58]:
duplicate_rows = df_yelp[df_yelp.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

For the city of Dublin, Foursquare API seems to have higher quality of data because it was observed to have more attributes for each POI in the city.  

Get the top 10 restaurants according to their rating

In [59]:
df_foursquare.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Latitude              114 non-null    float64
 1   Longitude             114 non-null    float64
 2   Average_dist_F        114 non-null    float64
 3   Average_popularity_F  114 non-null    float64
 4   Average_rating_F      114 non-null    float64
 5   Max_rating_F          114 non-null    float64
 6   Max_popularity_F      114 non-null    float64
 7   Min_rating_F          114 non-null    float64
 8   Min_popularity_F      114 non-null    float64
dtypes: float64(9)
memory usage: 8.1 KB


In [60]:
df_yelp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Latitude                114 non-null    float64
 1   Longitude               114 non-null    float64
 2   Average_distance_Y      114 non-null    float64
 3   Average_review_count_Y  114 non-null    float64
 4   Average_rating_Y        114 non-null    float64
 5   Max_rating_Y            114 non-null    float64
 6   Max_review_count        114 non-null    int64  
 7   Min_rating_Y            114 non-null    float64
 8   Min_review_count        114 non-null    int64  
dtypes: float64(7), int64(2)
memory usage: 8.1 KB


In [61]:
df_foursquare.describe()

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
count,114.0,114.0,114.0,114.0,114.0,114.0,114.0,114.0,114.0
mean,53.345562,-6.264373,682.203509,0.974267,9.004386,9.346491,0.998185,8.807018,0.92511
std,0.007612,0.01798,102.570163,0.011692,0.234947,0.168901,0.003132,0.329848,0.026571
min,53.330091,-6.310015,422.1,0.947313,8.12,8.7,0.981745,7.5,0.871047
25%,53.340001,-6.275193,620.975,0.966251,8.8925,9.3,0.997172,8.7,0.901793
50%,53.344903,-6.263106,683.4,0.972521,9.06,9.4,0.999486,8.9,0.923782
75%,53.350769,-6.251665,736.325,0.986267,9.17,9.5,0.999829,9.1,0.953035
max,53.359967,-6.230852,1060.1,0.992227,9.24,9.5,0.999886,9.1,0.972375


In [62]:
df_yelp.describe()

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
count,114.0,114.0,114.0,114.0,114.0,114.0,114.0,114.0,114.0
mean,53.345562,-6.264373,763.414455,21.463596,4.70943,5.0,100.517544,4.425439,3.964912
std,0.007612,0.01798,89.201427,6.475944,0.131549,0.0,47.510638,0.23265,1.088393
min,53.330091,-6.310015,559.662517,11.0,4.1,5.0,33.0,3.5,1.0
25%,53.340001,-6.275193,695.768457,16.4125,4.65,5.0,67.25,4.5,4.0
50%,53.344903,-6.263106,764.026115,20.025,4.725,5.0,101.0,4.5,4.0
75%,53.350769,-6.251665,832.72103,26.5375,4.8,5.0,106.75,4.5,5.0
max,53.359967,-6.230852,962.232056,37.15,4.9,5.0,200.0,4.5,5.0


In [63]:
# Top 10 restaurants according to their rating.
df_foursquare.sort_values('Max_rating_F', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
57,53.341288,-6.258117,490.6,0.991681,9.24,9.5,0.999829,9.1,0.963005
32,53.343034,-6.263578,440.9,0.991681,9.24,9.5,0.999829,9.1,0.963005
23,53.334295,-6.258503,755.3,0.981751,9.17,9.5,0.999829,9.0,0.92681
24,53.340803,-6.267732,606.2,0.988527,9.23,9.5,0.999829,9.1,0.958349
26,53.333653,-6.248345,831.8,0.964522,9.06,9.5,0.999829,8.9,0.918212
66,53.339334,-6.264699,608.9,0.988944,9.24,9.5,0.999829,9.1,0.958349
28,53.341428,-6.24672,723.7,0.98729,9.14,9.5,0.999829,9.0,0.946151
29,53.338614,-6.248606,749.4,0.986368,9.17,9.5,0.999829,9.0,0.946151
65,53.339434,-6.246548,701.3,0.978626,9.13,9.5,0.999829,9.0,0.914641
62,53.337757,-6.267699,674.8,0.988527,9.23,9.5,0.999829,9.1,0.958349


In [64]:
# Top 10 restaurants according to their rating.
df_yelp.sort_values('Max_rating_Y', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
0,53.35023,-6.279696,813.634974,21.05,4.725,5.0,97,4.5,4
85,53.344007,-6.266802,650.196668,20.6,4.825,5.0,101,4.5,5
83,53.355473,-6.264423,660.325056,18.45,4.75,5.0,50,4.5,4
82,53.34744,-6.238523,894.611292,20.0,4.575,5.0,68,4.5,3
81,53.339764,-6.251988,701.245857,19.85,4.75,5.0,101,4.5,5
80,53.356717,-6.256359,688.678282,24.65,4.725,5.0,106,4.5,4
79,53.343456,-6.287409,701.705495,22.9,4.7,5.0,165,4.5,3
78,53.356769,-6.26814,751.318015,17.8,4.725,5.0,56,4.5,4
77,53.359967,-6.264828,701.995524,27.4,4.675,5.0,106,4.5,3
76,53.330362,-6.265163,862.938719,25.85,4.825,5.0,165,4.5,5
