# Chicago Yelp API

### imports


In [1]:
import requests
import pandas as pd
import time
import data_cleaning as dc
import chicago_api as chi

### load api key

In [None]:
with open("data/api_key_yelp.txt", "r") as file:
    api_key = file.read().strip()

### header and params

In [None]:
headers = {"Authorization": f"Bearer {api_key}"}
url = "https://api.yelp.com/v3/businesses/search"

In [None]:
params = {"location": "Chicago, IL", "term": "food", "limit": 50}

### send req

In [None]:
response = requests.get(url, headers=headers, params=params)
data = response.json()


In [None]:
neighborhoods = []

with open("data/chicago_neighborhoods.txt", "r") as file:
    for line in file:
        item = line.strip()
        neighborhoods.append(item)



In [None]:
for item in neighborhoods[0:5]:
    neighborhood = f'{item}, Chicago, IL'
    
    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    time.sleep(1)
    print(f'{neighborhood} has {data["total"]} hits')




In [None]:

neighborhoods[0:5]

    

In [None]:



# businesses = []
# #for error "limit+offset must be <= 240."
# total = 240
# offset = 0
# while offset < total:
#     params["offset"] = offset
#     response = requests.get(url, headers=headers, params=params)
#     # status ok
#     if response.status_code == 200:
#         data = response.json()
#         businesses.extend(data.get("businesses",[]))
#     else:
#         print({response.status_code})
#         print(response.text)
#         break
#     offset += params["limit"]
#     time.sleep(1)

def location_search_params(api_key, location, **kwargs):
    """
    Construct url, headers and url_params. Reference API docs (link above) to use the arguments
    """
    # What is the url endpoint for search?
    url = "https://api.yelp.com/v3/businesses/search"
    # How is Authentication performed?
    headers = {"Authorization": f"Bearer {api_key}"}
    # SPACES in url is problematic. How should you handle location containing spaces?
    url_params = {"location": f'{location}, Chicago, IL'}
    # Include keyword arguments in url_params
    url_params = url_params | kwargs
    
    return url, headers, url_params

def api_get_request(url, headers, url_params):
    """
    Send a HTTP GET request and return a json response 
    
    Args:
        url (string): API endpoint url
        headers (dict): A python dictionary containing HTTP headers including Authentication to be sent
        url_params (dict): The parameters (required and optional) supported by endpoint
        
    Returns:
        results (json): response as json
    """
    http_method = 'GET'
    # See requests.request?
    response = requests.get(url, headers=headers, params=url_params)
    return response.json()

def paginated_restaurants_search_requests(api_key, location, total):
    """
    Returns a list of tuples (url, headers, url_params) for paginated search of up to 240 restaurants
    Args:
        api_key (string): Your Yelp API Key for Authentication
        location (string): Business Location
        total (int): Total number of items to be fetched
    Returns:
        results (list): list of tuple (url, headers, url_params)
    """
    # HINT: Use total, offset and limit for pagination
    # You can reuse function location_search_params(...)
    paginated_list = []

    # Use i to query the next 15 theaters in each request
    for i in range(0, total, 40):
        url, headers, url_params = location_search_params(api_key, location, offset=i, limit=40, categories="restaurants")
        paginated_list.append((url, headers, url_params))

    return paginated_list


businesses = []

for location in neighborhoods[0:50]:
    # 1st request, return number of entries
    url, headers, url_params = location_search_params(api_key, location, offset=0, limit=40, categories="restaurants")
    response = requests.get(url, headers=headers, params=url_params)
    time.sleep(0.5)
    if response.status_code == 200:
        data = response.json()
        print(f'{location} has {data["total"]} hits')
        total = min(240, data["total"])
        # create paginated list of queries
        paginated_queries = paginated_restaurants_search_requests(api_key, location, total)
        # run queries, append to buinesses
        for query in paginated_queries:
            response = api_get_request(*query)
            businesses.extend(response.get("businesses",[]))
            time.sleep(0.4)
    
    # return results

    

### get df

In [None]:
df = pd.DataFrame(businesses)

## I get 200 rows max and 400 status error

In [None]:
df.shape[0]


In [None]:
df.head()

In [None]:
dc.save(df, "data/chicago_restaurants_incomplete.json")

# calling chicago_api.py function

In [None]:
from chicago_api import get_chicago_restaurants_df

df = get_chicago_restaurants_df()

print(df.shape)
print(df.head())


In [None]:
chicago_restaurants = dc.load("data/chicago_restaurants_incomplete.json",100_000)

In [2]:
test_df = chi.get_chicago_restaurants_df(0, 4)

Albany Park has 4900 hits


  all_restaurants = pd.concat([all_restaurants, restaurants_df], ignore_index=True)


Riverdale has 970 hits
Edgewater has 1100 hits
West Ridge has 1100 hits
