In [68]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [69]:
## install tqdm if not already installed
# !pip install tqdm

In [70]:
# Load API credentials
with open('/Users/whitefreeze/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [71]:
# Instantiate YelpAPI variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x11e9b72b0>

In [72]:
# For API help resources
help(yelp_api.search_query)

Help on method search_query in module yelpapi.yelpapi:

search_query(**kwargs) method of yelpapi.yelpapi.YelpAPI instance
    Query the Yelp Search API.
    
    documentation: https://www.yelp.com/developers/documentation/v3/business_search
    
    required parameters:
        * one of either:
            * location - text specifying a location to search for
            * latitude and longitude



In [73]:
# set API call parameters
LOCATION = "Chattanooga, TN"
TERM = "asian"

In [74]:
# Specify JSON_FILE filename (including folder)
# include search terms in the filename
JSON_FILE = "Data/results_in_progress_Chattanooga_asian.json"
JSON_FILE

'Data/results_in_progress_Chattanooga_asian.json'

In [75]:
# Find key returned from API call
# use yelp_api variable's search_query method to perform API call
results = yelp_api.search_query(location=LOCATION, 
                                term=TERM)
print(f'Keys from results: {results.keys()}')

Keys from results: dict_keys(['businesses', 'total', 'region'])


In [78]:
# Function to write new file or overwrite previous file
def create_json_file(JSON_FILE, delete_if_exists = False): # Change to TRUE to overwrite existsing
    
    # Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # If file DOES exist:
    if file_exists == True:
        
        # Check if user wants to delete file, if it exists
        if delete_if_exists == True:
        
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            
            # Delete file and confirm it no longer exists.
            os.remove(JSON_FILE)
            
            # Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists = False)
        
        else:
            print(f"[i] {JSON_FILE} already exists.")
            
    # If file does NOT exist:
    else:
        
        # Inform user and save empty list
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        # Create any needed folders
        # Get folder name only
        folder = os.path.dirname(JSON_FILE)
        
        # If JSON_FILE is included in a folder
        if len(folder) > 0:
            
            # Create the folder
            os.makedirs(folder, exist_ok = True)
            
        # Save empty list to start the json file
        with open(JSON_FILE, 'w') as f:
            json.dump([], f)

In [79]:
create_json_file(JSON_FILE, delete_if_exists = False)

[i] Data/results_in_progress_Chattanooga_asian.json not found. Saving empty list to new file.


In [80]:
# Loop through each page of results from Yelp_API. Append results to running file

# First, verify file empty/load previous results. 
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
# Display number of previous results
n_results = len(previous_results)
print(f' - {n_results} previous results found.')

# Loop through each page of results from Yelp_API. Append results to running file
for i in tqdm_notebook( range( 1, n_pages+1)):
    
    # Read in results in progress file and check length
    with open(JSON_FILE, 'r') as f:
        previoius_results = json.load(f)
        
    # Save number of results to use as the offset
    n_results = len(previous_results)
    
    # To not exceed Yelp's free tier limits
    if(n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    # Use n_results as the offset
    results = yelp_api.search_query(location = LOCATION,
                                   term = TERM,
                                   offset = n_results)
    
# Append new results and save to file
previous_results.extend(results['businesses'])

# How many results in total?
total_results = results['total']
print(f'Total results for query: {total_results}')

# How many did we get the details for?
results_per_page = len(results['businesses'])
print(f'Results per page: {results_per_page}')

# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((total_results - n_results)/ results_per_page)
print(f'Total number of result pages: {n_pages}')

# Display previous_results
with open(JSON_FILE, 'w') as f:
    json.dump(previous_results, f)
    
# add 200 ms pause between api requests
time.sleep(.2)

 - 0 previous results found.


  0%|          | 0/4 [00:00<?, ?it/s]

Total results for query: 164
Results per page: 20
Total number of result pages: 9


In [81]:
# Load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,NEiSS5cDAeRgHc7TS0pxCg,steam-boys-cleveland,Steam Boys,https://s3-media3.fl.yelpcdn.com/bphoto/8Jm2hR...,False,https://www.yelp.com/biz/steam-boys-cleveland?...,35,"[{'alias': 'dimsum', 'title': 'Dim Sum'}, {'al...",4.0,"{'latitude': 35.2100599, 'longitude': -84.8566...","[delivery, pickup]",$$,"{'address1': '674 Sgt Paul Huff Pkwy NW', 'add...",14237900650,(423) 790-0650,38425.803383
1,zXrbBeIy83GaG9envfIVUg,han-mi-chattanooga-2,Han-Mi,https://s3-media2.fl.yelpcdn.com/bphoto/QCgKvz...,False,https://www.yelp.com/biz/han-mi-chattanooga-2?...,129,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 35.020864, 'longitude': -85.320737}",[],$$,"{'address1': '3103 Broad St', 'address2': None...",14235417273,(423) 541-7273,10116.896164
2,LpqPw6eU66voDREOx61OXg,volcano-korean-bbq-chattanooga,Volcano Korean BBQ,https://s3-media3.fl.yelpcdn.com/bphoto/Rl8QsW...,False,https://www.yelp.com/biz/volcano-korean-bbq-ch...,49,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.0,"{'latitude': 35.0312, 'longitude': -85.15562}","[delivery, pickup]",,"{'address1': '2011 Gunbarrel Rd', 'address2': ...",14232129968,(423) 212-9968,11934.090409
3,Pn4YyKqmWqYn4sLYre3gIA,super-pho-and-grill-chattanooga,Super Pho & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/hSahwJ...,False,https://www.yelp.com/biz/super-pho-and-grill-c...,47,"[{'alias': 'vietnamese', 'title': 'Vietnamese'}]",4.5,"{'latitude': 35.045631, 'longitude': -85.161837}",[delivery],,"{'address1': '7003 Lee Hwy', 'address2': 'Ste ...",14238993236,(423) 899-3236,10542.414263
4,xJkuexCB2mHcoeEysXeqFw,thai-esan-chattanooga,Thai Esan,https://s3-media1.fl.yelpcdn.com/bphoto/BLm7e4...,False,https://www.yelp.com/biz/thai-esan-chattanooga...,262,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 34.99573, 'longitude': -85.23878}","[delivery, pickup]",$$,"{'address1': '4330 Ringgold Rd', 'address2': '...",14236688924,(423) 668-8924,11394.125403


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
15,xegRlihfq9GTMwiM-Ct95w,rain-thai-bistro-chattanooga,Rain Thai Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/F2E58A...,False,https://www.yelp.com/biz/rain-thai-bistro-chat...,356,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",3.5,"{'latitude': 35.044631472223365, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '6933 Lee Hwy', 'address2': 'Ste ...",14233865586,(423) 386-5586,10272.400811
16,HcM5xrWhdsYmkkaighVEIg,tao-asian-cuisine-ringgold,Tao Asian Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/EzAaJR...,False,https://www.yelp.com/biz/tao-asian-cuisine-rin...,23,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.91195833, 'longitude': -85.130...",[],$$,"{'address1': '114 Remco Shops Ln', 'address2':...",17069521818,(706) 952-1818,23662.098314
17,le6msBiVy_C_FlWL_hWycA,chopstix-chattanooga-3,Chopstix,https://s3-media1.fl.yelpcdn.com/bphoto/we3au2...,False,https://www.yelp.com/biz/chopstix-chattanooga-...,214,"[{'alias': 'vietnamese', 'title': 'Vietnamese'...",4.0,"{'latitude': 35.0432567762187, 'longitude': -8...",[delivery],$$,"{'address1': '6903 Lee Hwy', 'address2': '', '...",14233050537,(423) 305-0537,10394.821497
18,7_aDCNR6edRhS7gvmlk2jQ,totto-sushi-and-grill-chattanooga,Totto Sushi & Grill,https://s3-media2.fl.yelpcdn.com/bphoto/7CG45M...,False,https://www.yelp.com/biz/totto-sushi-and-grill...,233,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 35.06183, 'longitude': -85.30565}",[delivery],$$,"{'address1': '330 Frazier Ave', 'address2': 'S...",14235088898,(423) 508-8898,5722.661663
19,l0r5ohrWFRpBpvZcBgwN5A,taichi-bubble-tea-downtown-chattanooga,Taichi Bubble Tea Downtown,https://s3-media4.fl.yelpcdn.com/bphoto/jyh3Pr...,False,https://www.yelp.com/biz/taichi-bubble-tea-dow...,141,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.0,"{'latitude': 35.05147, 'longitude': -85.31089}","[delivery, pickup]",$$,"{'address1': '411 Broad St', 'address2': 'Ste ...",14235415100,(423) 541-5100,6873.432708


In [82]:
# Check for duplicate results
final_df.duplicated(subset= 'id').sum()

0

In [83]:
# Drop duplicate ids and confirm there are no more duplicates
fina_df = final_df.drop_duplicates(subset= 'id')
fina_df.duplicated(subset= 'id').sum()

0

In [84]:
# Save the final results to a compressed csv
final_df.to_csv('Data/final_results_Chattanooga_asian.csv.gz',
compression ="gzip", index = False)