In [77]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [78]:
## install tqdm if not already installed
# !pip install tqdm

In [79]:
# Load API credentials
with open('/Users/whitefreeze/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [80]:
# Instantiate YelpAPI variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x11dd79e20>

In [81]:
# For API help resources
help(yelp_api.search_query)

Help on method search_query in module yelpapi.yelpapi:

search_query(**kwargs) method of yelpapi.yelpapi.YelpAPI instance
    Query the Yelp Search API.
    
    documentation: https://www.yelp.com/developers/documentation/v3/business_search
    
    required parameters:
        * one of either:
            * location - text specifying a location to search for
            * latitude and longitude



In [82]:
# set API call parameters
LOCATION = "Chattanooga, TN"
TERM = "asian"

In [83]:
# Specify JSON_FILE filename (including folder)
# include search terms in the filename
JSON_FILE = f"Data/results_in_progress_Chattanooga_asian.json"
JSON_FILE

'Data/results_in_progress_Chattanooga_asian.json'

In [84]:
# Find key returned from API call
# use yelp_api variable's search_query method to perform API call
results = yelp_api.search_query(location=LOCATION, 
                                term=TERM)
print(f'Keys from results: {results.keys()}')

Keys from results: dict_keys(['businesses', 'total', 'region'])


In [85]:
# Function to write new file or overwrite previous file
def create_json_file(JSON_FILE, delete_if_exists = False): # Change to TRUE to overwrite existsing
    
    # Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # If file DOES exist:
    if file_exists == True:
        
        # Check if user wants to delete file, if it exists
        if delete_if_exists == True:
        
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            
            # Delete file and confirm it no longer exists.
            os.remove(JSON_FILE)
            
            # Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists = False)
        
        else:
            print(f"[i] {JSON_FILE} already exists.")
            
    # If file does NOT exist:
    else:
        
        # Inform user and save empty list
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        # Create any needed folders
        # Get folder name only
        folder = os.path.dirname(JSON_FILE)
        
        # If JSON_FILE is included in a folder
        if len(folder) > 0:
            
            # Create the folder
            os.makedirs(folder, exist_ok = True)
            
        # Save empty list to start the json file
        with open(JSON_FILE, 'w') as f:
            json.dump([], f)

In [86]:
# Formula to write JSON 
def write_json(new_data, filename):
    """Appends a list of records (new_data) to a json file (filename).
    Adapted from: https://www.geeksforgeeks.org/append-to-json-file-using-python/"""
    
    with open(filename, 'r+') as file:
        
        # First load existing data into a dict
        file_data = json.load(file)
        
        # Choose extend  (append formats JSON hierachy incorectly)
        file_data.extend(new_data['businesses'])
            
        # Sets file's current position at offset.
        file.seek(0)
        
        # Convert back to json
        json.dump(file_data, file)
        
        #previous_results.extend(results['businesses'])

In [87]:
create_json_file(JSON_FILE, delete_if_exists = True)  ## Change back to False

[!] Data/results_in_progress_Chattanooga_asian.json already exists. Deleting previous file...
[i] Data/results_in_progress_Chattanooga_asian.json not found. Saving empty list to new file.


In [88]:
total_results

163

In [89]:
# Loop through each page of results from Yelp_API. Append results to running file

# First, verify file empty/load previous results. 
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
# Display number of previous results
n_results = len(previous_results)
print(f' - {n_results} previous results found.')

# Append new results and save to file
previous_results.extend(results['businesses'])

# How many results in total?
total_results = results['total']
print(f'Total results for query: {total_results}')

# How many did we get the details for?
results_per_page = len(results['businesses'])
print(f'Results per page: {results_per_page}')

# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((total_results - n_results)/ results_per_page)
print(f'Total number of result pages: {n_pages}')

# LOOP
# Loop through each page of results from Yelp_API. Append results to running file
for i in tqdm_notebook( range( 1, n_pages+1)):

    
    # Read in results in progress file and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)

    # Save number of results to use as the offset
    n_results = len(previous_results)

    print(n_results)

    # To not exceed Yelp's free tier limits
    if(n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break

    # Use n_results as the offset
    results = yelp_api.search_query(location = LOCATION,
                                   term = TERM,
                                   offset = n_results)

    # Append/extend results to existing file using a pre-made function
    write_json(results, JSON_FILE)

    # add 200 ms pause between api requests
    time.sleep(.2)
    

# Display previous_results
with open(JSON_FILE, 'w') as f:
    json.dump(previous_results, f)

 - 0 previous results found.
Total results for query: 163
Results per page: 20
Total number of result pages: 9


  0%|          | 0/9 [00:00<?, ?it/s]

0
20
40
60
80
100
120
140
150


In [96]:
n_results

150

In [90]:
# Load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,zXrbBeIy83GaG9envfIVUg,han-mi-chattanooga-2,Han-Mi,https://s3-media2.fl.yelpcdn.com/bphoto/QCgKvz...,False,https://www.yelp.com/biz/han-mi-chattanooga-2?...,129,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 35.020864, 'longitude': -85.320737}",[],$$,"{'address1': '3103 Broad St', 'address2': None...",14235417273,(423) 541-7273,10116.896164
1,NEiSS5cDAeRgHc7TS0pxCg,steam-boys-cleveland,Steam Boys,https://s3-media3.fl.yelpcdn.com/bphoto/8Jm2hR...,False,https://www.yelp.com/biz/steam-boys-cleveland?...,34,"[{'alias': 'dimsum', 'title': 'Dim Sum'}, {'al...",4.0,"{'latitude': 35.2100599, 'longitude': -84.8566...","[pickup, delivery]",$$,"{'address1': '674 Sgt Paul Huff Pkwy NW', 'add...",14237900650,(423) 790-0650,38425.803383
2,xJkuexCB2mHcoeEysXeqFw,thai-esan-chattanooga,Thai Esan,https://s3-media3.fl.yelpcdn.com/bphoto/tm_6Y-...,False,https://www.yelp.com/biz/thai-esan-chattanooga...,262,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 34.99573, 'longitude': -85.23878}","[pickup, delivery]",$$,"{'address1': '4330 Ringgold Rd', 'address2': '...",14236688924,(423) 668-8924,11394.125403
3,Pn4YyKqmWqYn4sLYre3gIA,super-pho-and-grill-chattanooga,Super Pho & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/hSahwJ...,False,https://www.yelp.com/biz/super-pho-and-grill-c...,49,"[{'alias': 'vietnamese', 'title': 'Vietnamese'}]",4.5,"{'latitude': 35.045631, 'longitude': -85.161837}",[delivery],,"{'address1': '7003 Lee Hwy', 'address2': 'Ste ...",14238993236,(423) 899-3236,10542.414263
4,LpqPw6eU66voDREOx61OXg,volcano-korean-bbq-chattanooga,Volcano Korean BBQ,https://s3-media3.fl.yelpcdn.com/bphoto/Rl8QsW...,False,https://www.yelp.com/biz/volcano-korean-bbq-ch...,48,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.0,"{'latitude': 35.0312, 'longitude': -85.15562}","[pickup, delivery]",,"{'address1': '2011 Gunbarrel Rd', 'address2': ...",14232129968,(423) 212-9968,11934.090409


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
145,EueZwALRh2NmFxfAXsjSEw,ingles-super-market-jasper,Ingles Super Market,https://s3-media1.fl.yelpcdn.com/bphoto/NiAGMO...,False,https://www.yelp.com/biz/ingles-super-market-j...,11,"[{'alias': 'grocery', 'title': 'Grocery'}]",4.0,"{'latitude': 34.4636108819589, 'longitude': -8...",[],$$,"{'address1': '1250 Appalachian Hwy', 'address2...",17066922696,(706) 692-2696,101731.283452
146,3denrvmw19MHBAw_yJyaeg,kokita-sweetwater-2,Kokita,https://s3-media3.fl.yelpcdn.com/bphoto/3CcscZ...,False,https://www.yelp.com/biz/kokita-sweetwater-2?a...,5,"[{'alias': 'catering', 'title': 'Caterers'}]",3.0,"{'latitude': 35.6009138498568, 'longitude': -8...",[],,"{'address1': '503 N Main St', 'address2': '', ...",18659245843,(865) 924-5843,91524.356767
147,XL9m4qF9nUj7E49FpGB0QQ,panera-bread-chattanooga-5,Panera Bread,https://s3-media1.fl.yelpcdn.com/bphoto/m7waQ_...,False,https://www.yelp.com/biz/panera-bread-chattano...,63,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",3.0,"{'latitude': 35.129439, 'longitude': -85.242501}","[pickup, delivery]",$$,"{'address1': '562 Northgate Mall Drive', 'addr...",14238770223,(423) 877-0223,3937.960103
148,RPMoAQr-bZvW3AqfA56jYA,walmart-supercenter-chattanooga-3,Walmart Supercenter,https://s3-media3.fl.yelpcdn.com/bphoto/AHfoMW...,False,https://www.yelp.com/biz/walmart-supercenter-c...,27,"[{'alias': 'deptstores', 'title': 'Department ...",2.0,"{'latitude': 35.0943225222391, 'longitude': -8...",[],$,"{'address1': '501 Signal Mountain Rd', 'addres...",14237567202,(423) 756-7202,6552.590568
149,Ub3_SVxp92Idrsk4xO1VQg,panera-bread-cleveland,Panera Bread,https://s3-media3.fl.yelpcdn.com/bphoto/oCpwhP...,False,https://www.yelp.com/biz/panera-bread-clevelan...,49,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",3.0,"{'latitude': 35.206987, 'longitude': -84.852089}","[pickup, delivery]",$$,"{'address1': '375 Paul Huff Parkway', 'address...",14234725444,(423) 472-5444,39008.331231


In [91]:
# Check for duplicate results
final_df.duplicated(subset= 'id').sum()

0

In [92]:
# Drop duplicate ids and confirm there are no more duplicates
fina_df = final_df.drop_duplicates(subset= 'id')
fina_df.duplicated(subset= 'id').sum()

0

In [97]:
# Save the final results to a compressed csv
final_df.to_csv('Data/final_results_Chattanooga_asian.csv.gz',
compression ="gzip", index = False)

# Save the final results to an uncompressed csv
final_df.to_csv('Data/final_results_Chattanooga_asian.csv', index = False)