In [8]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [9]:
# students must use their own username here instead of "brend"
with open('/Users/uurou/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1ceadccc850>

In [10]:
# set our API call parameters 
LOCATION = 'NY,NY'
TERM = 'cuisine'

In [25]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_NY_cusine.json"
JSON_FILE

'Data/results_in_progress_NY_cusine.json'

In [23]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
    ## If it does NOT exist:
    else:
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  
    

In [26]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/results_in_progress_NY_cusine.json not found. Saving empty list to new file.
- 0 previous results found.


800

In [27]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/800 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [28]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,_z2_1TE_1brlvljiGtJ4mA,aahar-indian-cuisine-new-york,Aahar Indian Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/V5QIjl...,False,https://www.yelp.com/biz/aahar-indian-cuisine-...,441,"[{'alias': 'indpak', 'title': 'Indian'}]",4.5,"{'latitude': 40.71334, 'longitude': -74.0081}","[delivery, pickup]",$$,"{'address1': '10 Murray St', 'address2': None,...",16466493374,(646) 649-3374,1463.316767
1,zyQpS4QrYYB0VKXf7JPUlQ,atithi-indian-cuisine-brooklyn,Atithi Indian Cuisine,https://s3-media3.fl.yelpcdn.com/bphoto/ZUBeEb...,False,https://www.yelp.com/biz/atithi-indian-cuisine...,209,"[{'alias': 'indpak', 'title': 'Indian'}]",5.0,"{'latitude': 40.714765, 'longitude': -73.961606}","[delivery, pickup, restaurant_reservation]",$$,"{'address1': '159 Grand St', 'address2': None,...",17186849192,(718) 684-9192,2948.013206
2,_Ki5XZTB8mL9RDYGj7KP0w,22-thai-cuisine-new-york,22 Thai Cuisine,https://s3-media3.fl.yelpcdn.com/bphoto/2TjKUO...,False,https://www.yelp.com/biz/22-thai-cuisine-new-y...,209,"[{'alias': 'thai', 'title': 'Thai'}]",2.5,"{'latitude': 40.70925, 'longitude': -74.0088}","[delivery, pickup]",$,"{'address1': '59 Nassau St', 'address2': '', '...",12127329250,(212) 732-9250,1301.160726
3,x-T3WaLPnbVrxc-ruzFhyQ,mughlai-indian-cuisine-new-york-9,Mughlai Indian Cuisine,https://s3-media3.fl.yelpcdn.com/bphoto/kxIbAP...,False,https://www.yelp.com/biz/mughlai-indian-cuisin...,32,"[{'alias': 'indpak', 'title': 'Indian'}]",4.5,"{'latitude': 40.70953534633694, 'longitude': -...","[delivery, pickup]",,"{'address1': '120 Cedar St', 'address2': '', '...",16463988985,(646) 398-8985,1624.04387
4,TUGZA-1JwJ_a7GTRTuGjQg,caravan-uyghur-cuisine-new-york,Caravan Uyghur Cuisine,https://s3-media3.fl.yelpcdn.com/bphoto/PG1SFc...,True,https://www.yelp.com/biz/caravan-uyghur-cuisin...,69,"[{'alias': 'kebab', 'title': 'Kebab'}, {'alias...",4.5,"{'latitude': 40.7074980083115, 'longitude': -7...","[delivery, pickup]",,"{'address1': '200 Water St', 'address2': '', '...",19172617445,(917) 261-7445,917.508643


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,kp5HpNxyJQGY13VDN-Yq9Q,qanoon-new-york-3,Qanoon,https://s3-media1.fl.yelpcdn.com/bphoto/tCjSXq...,False,https://www.yelp.com/biz/qanoon-new-york-3?adj...,142,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 40.74524, 'longitude': -74.00202}","[pickup, delivery]",$$,"{'address1': '180 9th Ave', 'address2': None, ...",16468439711,(646) 843-9711,4471.452724
996,UM3DuPsG6UKdmsC23O_Icw,aska-brooklyn-3,Aska,https://s3-media1.fl.yelpcdn.com/bphoto/JbqcbE...,False,https://www.yelp.com/biz/aska-brooklyn-3?adjus...,151,"[{'alias': 'scandinavian', 'title': 'Scandinav...",4.5,"{'latitude': 40.71226, 'longitude': -73.96641}",[],$$$$,"{'address1': '47 South 5th St', 'address2': ''...",19293376792,(929) 337-6792,2465.602012
997,W2CqfHWZoOiRuW2jt1J-xQ,turks-inn-brooklyn,Turk's Inn,https://s3-media3.fl.yelpcdn.com/bphoto/HFVZOo...,False,https://www.yelp.com/biz/turks-inn-brooklyn?ad...,76,"[{'alias': 'turkish', 'title': 'Turkish'}, {'a...",4.5,"{'latitude': 40.70554, 'longitude': -73.922316}",[pickup],$$$,"{'address1': '234 Starr St', 'address2': '', '...",17182150025,(718) 215-0025,6065.039785
998,q2Vq56jenOx8cXD2fCupYg,leitao-new-york-3,Leitao,https://s3-media3.fl.yelpcdn.com/bphoto/NA5nrj...,False,https://www.yelp.com/biz/leitao-new-york-3?adj...,138,"[{'alias': 'portuguese', 'title': 'Portuguese'...",4.5,"{'latitude': 40.73495, 'longitude': -74.00628}","[pickup, delivery]",$$$,"{'address1': '547 Hudson St', 'address2': '', ...",12128109944,(212) 810-9944,3433.593862
999,WS0ipFMNDJDy_MOylP-JGw,felix-new-york,Felix,https://s3-media3.fl.yelpcdn.com/bphoto/e5uEok...,False,https://www.yelp.com/biz/felix-new-york?adjust...,466,"[{'alias': 'french', 'title': 'French'}]",2.5,"{'latitude': 40.7226295, 'longitude': -74.0037...","[pickup, delivery]",$$$,"{'address1': '340 W Broadway', 'address2': '',...",12124310021,(212) 431-0021,2068.935798


In [30]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

200