# Chicago Yelp API

### imports


In [91]:
import requests
import pandas as pd
import time
import data_cleaning as dc

### load api key

In [6]:
with open("data/api_key_yelp.txt", "r") as file:
    api_key = file.read().strip()

### header and params

In [7]:
headers = {"Authorization": f"Bearer {api_key}"}
url = "https://api.yelp.com/v3/businesses/search"

In [None]:
params = {"location": "Chicago, IL", "term": "food", "limit": 50}

### send req

In [13]:
response = requests.get(url, headers=headers, params=params)
data = response.json()


In [25]:
neighborhoods = []

with open("data/chicago_neighborhoods.txt", "r") as file:
    for line in file:
        item = line.strip()
        neighborhoods.append(item)



In [None]:
for item in neighborhoods[0:5]:
    neighborhood = f'{item}, Chicago, IL'
    
    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    time.sleep(1)
    print(f'{neighborhood} has {data["total"]} hits')




Albany Park, Chicago, IL has 1300 hits
Riverdale, Chicago, IL has 111 hits
Edgewater, Chicago, IL has 319 hits
West Ridge, Chicago, IL has 376 hits
Archer Heights, Chicago, IL has 164 hits


In [35]:

neighborhoods[0:5]

    

['Albany Park', 'Riverdale', 'Edgewater', 'West Ridge', 'Archer Heights']

In [88]:



# businesses = []
# #for error "limit+offset must be <= 240."
# total = 240
# offset = 0
# while offset < total:
#     params["offset"] = offset
#     response = requests.get(url, headers=headers, params=params)
#     # status ok
#     if response.status_code == 200:
#         data = response.json()
#         businesses.extend(data.get("businesses",[]))
#     else:
#         print({response.status_code})
#         print(response.text)
#         break
#     offset += params["limit"]
#     time.sleep(1)

def location_search_params(api_key, location, **kwargs):
    """
    Construct url, headers and url_params. Reference API docs (link above) to use the arguments
    """
    # What is the url endpoint for search?
    url = "https://api.yelp.com/v3/businesses/search"
    # How is Authentication performed?
    headers = {"Authorization": f"Bearer {api_key}"}
    # SPACES in url is problematic. How should you handle location containing spaces?
    url_params = {"location": f'{location}, Chicago, IL'}
    # Include keyword arguments in url_params
    url_params = url_params | kwargs
    
    return url, headers, url_params

def api_get_request(url, headers, url_params):
    """
    Send a HTTP GET request and return a json response 
    
    Args:
        url (string): API endpoint url
        headers (dict): A python dictionary containing HTTP headers including Authentication to be sent
        url_params (dict): The parameters (required and optional) supported by endpoint
        
    Returns:
        results (json): response as json
    """
    http_method = 'GET'
    # See requests.request?
    response = requests.get(url, headers=headers, params=url_params)
    return response.json()

def paginated_restaurants_search_requests(api_key, location, total):
    """
    Returns a list of tuples (url, headers, url_params) for paginated search of up to 240 restaurants
    Args:
        api_key (string): Your Yelp API Key for Authentication
        location (string): Business Location
        total (int): Total number of items to be fetched
    Returns:
        results (list): list of tuple (url, headers, url_params)
    """
    # HINT: Use total, offset and limit for pagination
    # You can reuse function location_search_params(...)
    paginated_list = []

    # Use i to query the next 15 theaters in each request
    for i in range(0, total, 40):
        url, headers, url_params = location_search_params(api_key, location, offset=i, limit=40, categories="restaurants")
        paginated_list.append((url, headers, url_params))

    return paginated_list


businesses = []

for location in neighborhoods[0:50]:
    # 1st request, return number of entries
    url, headers, url_params = location_search_params(api_key, location, offset=0, limit=40, categories="restaurants")
    response = requests.get(url, headers=headers, params=url_params)
    time.sleep(0.5)
    if response.status_code == 200:
        data = response.json()
        print(f'{location} has {data["total"]} hits')
        total = min(240, data["total"])
        # create paginated list of queries
        paginated_queries = paginated_restaurants_search_requests(api_key, location, total)
        # run queries, append to buinesses
        for query in paginated_queries:
            response = api_get_request(*query)
            businesses.extend(response.get("businesses",[]))
            time.sleep(0.4)
    
    # return results

    

Albany Park has 4900 hits
Riverdale has 969 hits
Edgewater has 1100 hits
West Ridge has 1100 hits
Archer Heights has 685 hits
Armour Square has 1300 hits
Ashburn has 1900 hits
Ashburn has 1900 hits
Auburn Gresham has 582 hits
Avalon Park has 442 hits
Avondale has 6500 hits
Irving Park has 6200 hits
New City has 3600 hits
Belmont Cragin has 945 hits
Avondale has 6500 hits
Hermosa has 1400 hits
Logan Square has 1700 hits
Dunning has 2600 hits
Dunning has 2600 hits
Beverly has 443 hits
Ashburn has 1900 hits
Morgan Park has 340 hits
Norwood Park has 2500 hits
Lake View has 1800 hits
Lincoln Square has 1800 hits
Washington Heights has 440 hits
Belmont Cragin has 945 hits
Bridgeport has 1200 hits
Brighton Park has 828 hits
Douglas has 929 hits
Logan Square has 1700 hits
Lincoln Square has 1800 hits
Uptown has 1400 hits
Burnside has 114 hits
Near North Side has 5500 hits
Calumet Heights has 361 hits
New City has 3600 hits
Near South Side has 2300 hits
Chatham has 482 hits


KeyboardInterrupt: 

### get df

In [89]:
df = pd.DataFrame(businesses)

## I get 200 rows max and 400 status error

In [90]:
df.shape[0]


9074

In [86]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,business_hours,attributes
0,eJzm8nb1sIiJOnBdUV-GZA,bundoo-khan-chicago-chicago-2,Bundoo Khan -Chicago,https://s3-media3.fl.yelpcdn.com/bphoto/pItT1o...,False,https://www.yelp.com/biz/bundoo-khan-chicago-c...,422,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.0,"{'latitude': 41.9975505, 'longitude': -87.6939...","[pickup, delivery]",$$,"{'address1': '2539 W Devon Ave', 'address2': N...",18722419100,(872) 241-9100,430.647484,"[{'open': [{'is_overnight': False, 'start': '1...","{'business_temp_closed': None, 'menu_url': Non..."
1,vWC4wd1bYO2e-6WvN94M0g,anmol-barbecue-chicago,Anmol Barbecue,https://s3-media4.fl.yelpcdn.com/bphoto/GJIbU_...,False,https://www.yelp.com/biz/anmol-barbecue-chicag...,338,"[{'alias': 'pakistani', 'title': 'Pakistani'},...",3.9,"{'latitude': 41.997436, 'longitude': -87.693379}","[pickup, delivery]",$$,"{'address1': '6355 N Maplewood Ave', 'address2...",17735085050,(773) 508-5050,445.01997,"[{'open': [{'is_overnight': False, 'start': '1...","{'business_temp_closed': None, 'menu_url': 'ht..."
2,dc3QDT31XmMF5EU1HmrKZg,nepal-house-chicago-9,Nepal House,https://s3-media2.fl.yelpcdn.com/bphoto/fN-lzW...,False,https://www.yelp.com/biz/nepal-house-chicago-9...,305,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.4,"{'latitude': 41.9975194, 'longitude': -87.6949...","[pickup, delivery]",$$,"{'address1': '2601 W Devon Ave', 'address2': N...",17736810200,(773) 681-0200,463.967705,"[{'open': [{'is_overnight': False, 'start': '1...","{'business_temp_closed': None, 'menu_url': 'ht..."
3,XOLX_XVqY776i6PyIHp_6Q,the-warbler-chicago-2,The Warbler,https://s3-media1.fl.yelpcdn.com/bphoto/D35-x6...,False,https://www.yelp.com/biz/the-warbler-chicago-2...,545,"[{'alias': 'pastashops', 'title': 'Pasta Shops...",4.4,"{'latitude': 41.9642255519757, 'longitude': -8...",[delivery],$$,"{'address1': '4535 N Lincoln Ave', 'address2':...",17736810950,(773) 681-0950,4170.127588,"[{'open': [{'is_overnight': False, 'start': '1...","{'business_temp_closed': None, 'menu_url': Non..."
4,9mhqcimD0CYvqxzfj_VXnQ,little-bad-wolf-chicago,Little Bad Wolf,https://s3-media1.fl.yelpcdn.com/bphoto/0vOL2Y...,False,https://www.yelp.com/biz/little-bad-wolf-chica...,2458,"[{'alias': 'tradamerican', 'title': 'American'...",4.4,"{'latitude': 41.983356, 'longitude': -87.669057}","[pickup, delivery]",$$,"{'address1': '1541 W Bryn Mawr Ave', 'address2...",17739426399,(773) 942-6399,2796.048061,"[{'open': [{'is_overnight': False, 'start': '1...","{'business_temp_closed': None, 'menu_url': 'ht..."


In [92]:
dc.save(df, "data/chicago_restaurants_incomplete.json")

# calling chicago_api.py function

In [7]:
from chicago_api import get_chicago_restaurants_df

df = get_chicago_restaurants_df()

print(df.shape)
print(df.head())


Albany Park has 4900 hits
Riverdale has 970 hits
Edgewater has 1100 hits
West Ridge has 1100 hits
Archer Heights has 686 hits
Armour Square has 1300 hits
Ashburn has 1900 hits
Auburn Gresham has 582 hits
Avalon Park has 443 hits
Avondale has 6500 hits
Irving Park has 6100 hits
New City has 3600 hits
Belmont Cragin has 945 hits
Hermosa has 1400 hits
Logan Square has 1700 hits
Dunning has 2700 hits
Beverly has 443 hits
Morgan Park has 340 hits
Norwood Park has 2500 hits
Lake View has 1800 hits
Lincoln Square has 1800 hits
Washington Heights has 441 hits
Bridgeport has 1200 hits
Brighton Park has 830 hits
Douglas has 930 hits
Uptown has 1400 hits
Burnside has 114 hits
Near North Side has 5500 hits
Calumet Heights has 363 hits
Near South Side has 2300 hits
Chatham has 483 hits
(7314, 18)
                       id                           alias  \
0  uQFHfnbJm7LIxLhjMKc4mQ  lawrence-fish-market-chicago-2   
1  8j97C2qR95tvpbGtoFxEAw         noon-o-kabab-chicago-11   
2  YqzDH4OmV43NeD1h4yu