In [1]:
import json
import requests
import time

from tqdm import tqdm
from pprint import pprint

In [2]:
API_KEY = "YELP_API_KEY"

ENDPOINT = "https://api.yelp.com/v3/businesses/search"

HEADERS = {
    'Authorization': 'Bearer {}'.format(API_KEY)
}

In [3]:
CUISINES = ["chinese", "indian", "japanese"]

seen_ids = set()
all_restaurants = []

In [4]:
def fetch_data_for_cuisine(cuisine):
    global seen_ids
    restaurants = []
    PARAMS = {
        'term': f'{cuisine} restaurants',
        'location': 'Manhattan, NY',
        'limit': 50
    }

    for _ in tqdm(range(1), desc=cuisine):  # 1 * 50 = 50
        response = requests.get(url=ENDPOINT, headers=HEADERS, params=PARAMS)
        if response.status_code == 200:
            businesses = response.json()['businesses']
            for business in businesses:
                if business['id'] not in seen_ids:
                    business['cuisine'] = cuisine
                    restaurants.append(business)
                    seen_ids.add(business['id'])
            PARAMS['offset'] = len(restaurants)
        elif response.status_code == 429:
            # Reached API rate limit
            print("Rate limit reached! Waiting for 5 minutes...")
            time.sleep(300) # sleep for 5 minutes
        else:
            print(f"Error fetching data for {cuisine}!")
            break
        time.sleep(1)  # Small delay between requests to be kind to the API.
    return restaurants

In [5]:
for cuisine in CUISINES:
    data = fetch_data_for_cuisine(cuisine)
    all_restaurants.extend(data)

chinese: 100%|██████████| 1/1 [00:01<00:00,  1.91s/it]
indian: 100%|██████████| 1/1 [00:02<00:00,  2.24s/it]
japanese: 100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


In [6]:
print("Total restaurants", len(all_restaurants))

Total restaurants 149


In [7]:
pprint(all_restaurants[0])

{'alias': 'blue-willow-夜来湘-new-york-2',
 'categories': [{'alias': 'szechuan', 'title': 'Szechuan'}],
 'coordinates': {'latitude': 40.76292, 'longitude': -73.976546},
 'cuisine': 'chinese',
 'display_phone': '(212) 213-2299',
 'distance': 348.0137568924224,
 'id': 'XsXLVWr1UZWVhKThNvNiaA',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/1E59vOqmXZHBlJe0lLBHtA/o.jpg',
 'is_closed': False,
 'location': {'address1': '40 W 56th St',
              'address2': None,
              'address3': '',
              'city': 'New York',
              'country': 'US',
              'display_address': ['40 W 56th St', 'New York, NY 10019'],
              'state': 'NY',
              'zip_code': '10019'},
 'name': 'Blue Willow 夜来湘',
 'phone': '+12122132299',
 'rating': 4.5,
 'review_count': 973,
 'transactions': ['delivery', 'pickup'],
 'url': 'https://www.yelp.com/biz/blue-willow-%E5%A4%9C%E6%9D%A5%E6%B9%98-new-york-2?adjust_creative=jtr0-h2hTZMxFOWOENvd4Q&utm_campaign=yelp_api_v3&utm_medium=api

In [8]:
with open('yelp_restaurants_data.json', 'w') as outfile:
    json.dump(all_restaurants, outfile)