In [5]:
import json
import requests
import time

from tqdm import tqdm
from pprint import pprint

In [6]:
API_KEY = "YELP_API_KEY"

ENDPOINT = "https://api.yelp.com/v3/businesses/search"

HEADERS = {
    'Authorization': 'Bearer {}'.format(API_KEY)
}

In [7]:
CUISINES = ["chinese", "indian", "japanese", "thai", "italian"]

seen_ids = set()
all_restaurants = []

In [8]:
def fetch_data_for_cuisine(cuisine):
    global seen_ids
    restaurants = []
    PARAMS = {
        'term': f'{cuisine} restaurants',
        'location': 'Manhattan, NY',
        'limit': 50
    }

    for _ in tqdm(range(20), desc=cuisine):  # 20 * 50 = 1000
        response = requests.get(url=ENDPOINT, headers=HEADERS, params=PARAMS)
        if response.status_code == 200:
            businesses = response.json()['businesses']
            for business in businesses:
                if business['id'] not in seen_ids:
                    restaurants.append(business)
                    seen_ids.add(business['id'])
            PARAMS['offset'] = len(restaurants)
        elif response.status_code == 429:
            # Reached API rate limit
            print("Rate limit reached! Waiting for 5 minutes...")
            time.sleep(300) # sleep for 5 minutes
        else:
            print(f"Error fetching data for {cuisine}!")
            break
        time.sleep(1)  # Small delay between requests to be kind to the API.
    return restaurants

In [9]:
for cuisine in CUISINES:
    data = fetch_data_for_cuisine(cuisine)
    all_restaurants.extend(data)

chinese: 100%|██████████| 20/20 [00:39<00:00,  1.99s/it]
indian: 100%|██████████| 20/20 [00:37<00:00,  1.87s/it]
japanese: 100%|██████████| 20/20 [00:38<00:00,  1.93s/it]
thai: 100%|██████████| 20/20 [00:35<00:00,  1.80s/it]
italian: 100%|██████████| 20/20 [00:47<00:00,  2.38s/it]


In [10]:
print("Total restaurants", len(all_restaurants))

Total restaurants 3267


In [12]:
pprint(all_restaurants[0])

{'alias': 'blue-willow-夜来湘-new-york-2',
 'categories': [{'alias': 'szechuan', 'title': 'Szechuan'}],
 'coordinates': {'latitude': 40.76292, 'longitude': -73.976546},
 'display_phone': '(212) 213-2299',
 'distance': 348.0137568924224,
 'id': 'XsXLVWr1UZWVhKThNvNiaA',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/1E59vOqmXZHBlJe0lLBHtA/o.jpg',
 'is_closed': False,
 'location': {'address1': '40 W 56th St',
              'address2': None,
              'address3': '',
              'city': 'New York',
              'country': 'US',
              'display_address': ['40 W 56th St', 'New York, NY 10019'],
              'state': 'NY',
              'zip_code': '10019'},
 'name': 'Blue Willow 夜来湘',
 'phone': '+12122132299',
 'rating': 4.5,
 'review_count': 973,
 'transactions': ['delivery', 'pickup'],
 'url': 'https://www.yelp.com/biz/blue-willow-%E5%A4%9C%E6%9D%A5%E6%B9%98-new-york-2?adjust_creative=FXlRuFWrbRzecuY2jWQjgQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm

In [13]:
with open('yelp_restaurants_data.json', 'w') as outfile:
    json.dump(all_restaurants, outfile)