# Imports and functions

**[Work in progress]**

This notebook creates a .csv file with yelp information for ingestion into the Knowledge Graph.

In [10]:
import os
import time
import json
import pandas as pd

from pathlib import Path
from yelpapi import YelpAPI
from pprint import pprint

In [2]:
# read yelp api key
with open('key.data', 'r') as key:
    api_key = key.read()

yelp_api = YelpAPI(api_key)

In [3]:
def float_range(start, stop, step):
    while round(start, 6) < stop:
        yield float(round(start, 6))
        start += step

# Generate coordinates for map sweep

In [4]:
# Other terms: restaurant, attraction, hotel
term = 'restaurant'

if term in ('restaurant', 'attraction'):
    # 1 degree: approximately 111 km
    # using step size of 0.018 degree
    start_latitude = 32.550
    stop_latitude = 33.100
    step_latitude = 0.018
    # 1 degree: approximaely 93 km
    # using step size of 0.020 degree
    start_longitude = -117.260
    stop_longitude = -116.900
    step_longitude = 0.020
    # radius of search 1000m
    radius = 1000
elif term in ('hotel'):
    # 1 degree: approximately 111 km
    # using step size of 0.090 degree
    start_latitude = 32.550
    stop_latitude = 33.100
    step_latitude = 0.090
    # 1 degree: approximaely 93 km
    # using step size of 0.100 degree
    start_longitude = -117.260
    stop_longitude = -116.900
    step_longitude = 0.100
    # radius of search 5000m
    radius = 5000
else:
    print('Invalid term
    
# yelp api offset and limit
offset = 0
limit = 50

In [5]:
zone = list()
for lat in float_range(start_latitude, stop_latitude, step_latitude * 1.3):
    for long in float_range(start_longitude, stop_longitude, step_longitude):
        zone.append((lat, long))
        
for lat in float_range(start_latitude + step_latitude/1.45, stop_latitude, step_latitude * 1.3):
    for long in float_range(start_longitude + step_longitude/2, stop_longitude, step_longitude):
        zone.append((lat, long))
        
print('Number of coordinates: {}'.format(len(zone)))

Number of coordinates: 846


In [6]:
import folium
import random as rnd

zone_center = [
    round((start_latitude + stop_latitude - step_latitude) / 2, 3), 
    round((start_longitude + stop_longitude - step_longitude) / 2, 3)
]
print(zone_center)

# Build map 
map_zone = folium.Map(location=zone_center, zoom_start=10, tiles='cartodbpositron', width=800, height=600)

# folium.CircleMarker(zone_center, radius=1, color='#00FF00', fill_color='#0080bb').add_to(map_zone)
folium.Rectangle(zone, color='#0080bb', fill_color='#0080bb').add_to(map_zone)

for i in range(len(zone)):
    folium.CircleMarker(zone[i], radius=1, color='#FF0000', fill_color='#0080bb').add_to(map_zone)
    folium.Circle(zone[i], radius=radius, color='#FF0000', fill_color='#0080bb').add_to(map_zone)

map_zone

[32.816, -117.09]


# Scapping

In [22]:
def scrap_yelp(df_combined, latitude, longitude, radius):
    # Maximum 1000 businesses can be pulled for each coordinate
    scrapped = 0
    total = 0
    for i in range(20):
        print('\tScrapping offset: {}'.format(limit * i))
        for attempt in range(5):
            try:
                response = yelp_api.search_query(term=term, latitude=latitude, longitude=longitude, 
                                                 limit=limit, radius=radius, offset=scrapped)
                break
            except:
                print('Something is wrong. Sleep for 10 sec')
                time.sleep(10)
        else:
            raise BaseException('Exhausted all attempts')
        
        scrapped += len(response['businesses'])
        df = pd.json_normalize(response['businesses'])    

        if df_combined is not None:
            df_combined = pd.concat([df_combined, df], sort=False, ignore_index=True)
        else:       
            df_combined = df.copy()

        # More to scrapped from this location
        if response['total'] <= scrapped:
            print('Completed scrapping. Total: {}'.format(response['total']))
            total = response['total']
            break
    return df_combined.copy(), total

In [23]:
df_business = None 
df_density = pd.DataFrame(columns=['latitude', 'longitude', 'radius', 'density'])
for latitude, longitude in zone:
    print('Scrapping coordinate: ({}, {}). Radius: {}'.format(latitude, longitude, radius))
    df_business, total = scrap_yelp(df_business, latitude, longitude, radius)
    df_density = df_density.append({'latitude':latitude, 
                                    'longitude':longitude, 
                                    'radius':radius, 
                                    'density':total}, 
                                   ignore_index=True)

Scrapping coordinate: (32.55, -117.26). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.24). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.22). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.2). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.18). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.16). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.14). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.12). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.1). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.55, -117.08). Radius: 1000
	Scrapping of

Completed scrapping. Total: 0
Scrapping coordinate: (32.6436, -117.14). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.6436, -117.12). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.6436, -117.1). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 36
Scrapping coordinate: (32.6436, -117.08). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 27
Scrapping coordinate: (32.6436, -117.06). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 17
Scrapping coordinate: (32.6436, -117.04). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 9
Scrapping coordinate: (32.6436, -117.02). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 8
Scrapping coordinate: (32.6436, -117.0). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 26
Scrapping coordinate: (32.6436, -116.98). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coor

Completed scrapping. Total: 11
Scrapping coordinate: (32.7372, -117.08). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate: (32.7372, -117.06). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 11
Scrapping coordinate: (32.7372, -117.04). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 41
Scrapping coordinate: (32.7372, -117.02). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 20
Scrapping coordinate: (32.7372, -117.0). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate: (32.7372, -116.98). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.7372, -116.96). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.7372, -116.94). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 29
Scrapping coordinate: (32.7372, -116.92). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 9
Scrapping co

Completed scrapping. Total: 0
Scrapping coordinate: (32.8308, -117.02). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate: (32.8308, -117.0). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 17
Scrapping coordinate: (32.8308, -116.98). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 37
Scrapping coordinate: (32.8308, -116.96). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 9
Scrapping coordinate: (32.8308, -116.94). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate: (32.8308, -116.92). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.8542, -117.26). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 20
Scrapping coordinate: (32.8542, -117.24). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.8542, -117.22). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 10
Scrapping coo

Completed scrapping. Total: 0
Scrapping coordinate: (32.9244, -116.92). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.9478, -117.26). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 9
Scrapping coordinate: (32.9478, -117.24). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 49
Scrapping coordinate: (32.9478, -117.22). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 8
Scrapping coordinate: (32.9478, -117.2). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.9478, -117.18). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.9478, -117.16). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.9478, -117.14). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.9478, -117.12). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordi

Completed scrapping. Total: 0
Scrapping coordinate: (33.0414, -117.16). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (33.0414, -117.14). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.0414, -117.12). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.0414, -117.1). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.0414, -117.08). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate: (33.0414, -117.06). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.0414, -117.04). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate: (33.0414, -117.02). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (33.0414, -117.0). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordina

Completed scrapping. Total: 15
Scrapping coordinate: (32.585814, -117.05). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 13
Scrapping coordinate: (32.585814, -117.03). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 19
Scrapping coordinate: (32.585814, -117.01). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.585814, -116.99). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.585814, -116.97). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.585814, -116.95). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.585814, -116.93). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.585814, -116.91). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.609214, -117.25). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Tota

Completed scrapping. Total: 3
Scrapping coordinate: (32.679414, -116.99). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.679414, -116.97). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.679414, -116.95). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate: (32.679414, -116.93). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.679414, -116.91). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.702814, -117.25). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.702814, -117.23). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate: (32.702814, -117.21). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.702814, -117.19). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 

Completed scrapping. Total: 48
Scrapping coordinate: (32.773014, -117.01). Radius: 1000
	Scrapping offset: 0
	Scrapping offset: 50
Completed scrapping. Total: 65
Scrapping coordinate: (32.773014, -116.99). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.773014, -116.97). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.773014, -116.95). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate: (32.773014, -116.93). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate: (32.773014, -116.91). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.796414, -117.25). Radius: 1000
	Scrapping offset: 0
	Scrapping offset: 50
	Scrapping offset: 100
Completed scrapping. Total: 128
Scrapping coordinate: (32.796414, -117.23). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 46
Scrapping coordinate: (32.796414, 

Completed scrapping. Total: 0
Scrapping coordinate: (32.866614, -116.95). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.866614, -116.93). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 11
Scrapping coordinate: (32.866614, -116.91). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate: (32.890014, -117.25). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 10
Scrapping coordinate: (32.890014, -117.23). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 14
Scrapping coordinate: (32.890014, -117.21). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 30
Scrapping coordinate: (32.890014, -117.19). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 7
Scrapping coordinate: (32.890014, -117.17). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate: (32.890014, -117.15). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Tot

Completed scrapping. Total: 25
Scrapping coordinate: (32.983614, -117.23). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.983614, -117.21). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.983614, -117.19). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate: (32.983614, -117.17). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.983614, -117.15). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (32.983614, -117.13). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.983614, -117.11). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (32.983614, -117.09). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 18
Scrapping coordinate: (32.983614, -117.07). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total

Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -117.15). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -117.13). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -117.11). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -117.09). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -117.07). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 26
Scrapping coordinate: (33.077214, -117.05). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (33.077214, -117.03). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate: (33.077214, -117.01). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate: (33.077214, -116.99). Radius: 1000
	Scrapping offset: 0
Completed scrapping. Total:

In [24]:
print('Total business scrapped: {}'.format(len(df_business)))

Total business scrapped: 9758


In [25]:
df_business = df_business.drop_duplicates(subset=['id'])

In [26]:
print('Total location scrapped: {}'.format(len(df_density)))
print('Total unique business scrapped: {}'.format(len(df_business)))

Total location scrapped: 846
Total unique business scrapped: 6064


In [28]:
columns={
    'coordinates.latitude':'coordinates_latitude',
    'coordinates.longitude':'coordinates_longitude',    
    'location.address1':'location_address1',
    'location.address2':'location_address2',
    'location.address3':'location_address3',
    'location.city':'location_city',
    'location.zip_code':'location_zip_code',
    'location.country':'location_country',
    'location.state':'location_state',
    'location.display_address':'location_display_address',
}
df_business_final = df_business.rename(columns=columns)

In [29]:
df_business_final.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,coordinates_latitude,coordinates_longitude,location_address1,location_address2,location_address3,location_city,location_zip_code,location_country,location_state,location_display_address
0,AJ1SJkAVv1H6lA4pDnZ0dQ,ying-li-restaurant-san-ysidro,Ying Li Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/YJPpJz...,False,https://www.yelp.com/biz/ying-li-restaurant-sa...,165,"[{'alias': 'chinese', 'title': 'Chinese'}]",4.0,"[pickup, delivery]",...,32.554284,-117.050823,342 W San Ysidro Blvd,,,San Ysidro,92173,US,CA,"[342 W San Ysidro Blvd, San Ysidro, CA 92173]"
1,uGt9jvS6tcPpYhJa1kDl1w,la-cachonda-cenaduria-san-diego,La Cachonda Cenaduria,https://s3-media2.fl.yelpcdn.com/bphoto/orODHq...,False,https://www.yelp.com/biz/la-cachonda-cenaduria...,3,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,[delivery],...,32.555684,-117.051453,347 W San Ysidro Blvd,,,San Diego,92173,US,CA,"[347 W San Ysidro Blvd, San Diego, CA 92173]"
2,9BeEPZ4p0iEHYyKLYBLRBw,zappy-pizza-san-diego,Zappy Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/FTn7AB...,False,https://www.yelp.com/biz/zappy-pizza-san-diego...,33,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"[pickup, delivery]",...,32.554764,-117.052038,416 W San Ysidro Blvd,,,San Diego,92173,US,CA,"[416 W San Ysidro Blvd, San Diego, CA 92173]"
3,Ws4nOG8u2uSTUqEA28a30Q,tropifrutas-san-ysidro,Tropifrutas,https://s3-media1.fl.yelpcdn.com/bphoto/SA-nYT...,False,https://www.yelp.com/biz/tropifrutas-san-ysidr...,133,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",2.0,"[pickup, delivery]",...,32.55435,-117.05065,342 W San Ysidro Blvd,,,San Ysidro,92173,US,CA,"[342 W San Ysidro Blvd, San Ysidro, CA 92173]"
4,bN2HUTEFnzSKVJI4dxvkhQ,robertacos-mexican-food-san-ysidro,Robertacos Mexican Food,https://s3-media3.fl.yelpcdn.com/bphoto/saJgJY...,False,https://www.yelp.com/biz/robertacos-mexican-fo...,98,"[{'alias': 'mexican', 'title': 'Mexican'}]",2.0,[delivery],...,32.55596,-117.05495,492 W San Ysidro Blvd,,,San Ysidro,92173,US,CA,"[492 W San Ysidro Blvd, San Ysidro, CA 92173]"


In [40]:
df_density.head()

Unnamed: 0,latitude,longitude,radius,density
0,32.55,-117.26,1000.0,0.0
1,32.55,-117.24,1000.0,0.0
2,32.55,-117.22,1000.0,0.0
3,32.55,-117.2,1000.0,0.0
4,32.55,-117.18,1000.0,0.0


In [33]:
with open('./../../data/yelp_{}.csv'.format(term), 'w') as f:
    df_business_final.to_csv(f, index=False)

In [41]:
with open('./../../data/yelp_{}_density.csv'.format(term), 'w') as f:
    df_density.to_csv(f, index=False)

# Preprocess data

In [55]:
df_process = pd.read_csv('./../../data/yelp_{}.csv'.format(term))

In [56]:
def process_categories(s):
    alias_list = list()
    s = s.strip('[]{}')
    for s1 in s.split('}, {'):
        for s2 in s1.split(','):
            if ':' in s2:
                k, v = s2.split(':')
                k = k.strip(' \'')
                v = v.strip(' \'')
                if k == 'alias':
                    alias_list.append(v)
    return ','.join(alias_list)

In [57]:
df_process['categories_alias'] = df_process['categories'].apply(process_categories)

In [58]:
with open('./../../data/yelp_{}.csv'.format(term), 'w') as f:
    df_process.to_csv(f, index=False)

# Get business reviews

In [38]:
df_csv = pd.read_csv('./../../data/yelp_{}.csv'.format(term))

In [42]:
df_coordinates = df_csv[['coordinates_latitude', 'coordinates_longitude']].dropna()

In [43]:
# Build map 
map_zone = folium.Map(location=zone_center, zoom_start=10, tiles='cartodbpositron', width=800, height=600)
for i in range(len(df_coordinates)):
    folium.CircleMarker((df_coordinates.iloc[i][0], df_coordinates.iloc[i][1]), 
                        radius=1, color='#FF0000', fill_color='#0080bb').add_to(map_zone)
map_zone

In [61]:
df_review = None  # comment out to continue from last business id
start_again = True  # Set to False to continue from last business id
for i, business_id in enumerate(df_csv['id']):
    if not start_again:
        if business_id == 'W0r0OrTWNlfP_iR9D9wddw':
            start_again = True
    else:
        for attempt in range(5):
            try:
                response = yelp_api.reviews_query(id=business_id)
                break
            except:
                print('Something is wrong. Sleep for 10 sec')
                time.sleep(10)
        else:
            raise BaseException('Exhausted all attempts')
        df = pd.json_normalize(response['reviews'])
        df.insert(0, 'business_id', business_id) 

        if df_review is not None:
            df_review = pd.concat([df_review, df], sort=False, ignore_index=True)
        else:       
            df_review = df.copy()
    
    if i % 10 == 0:
        print('Processing business #{}'.format(i + 1))

Processing business #1
Processing business #11
Processing business #21
Processing business #31
Processing business #41
Processing business #51
Processing business #61
Processing business #71
Processing business #81
Processing business #91
Processing business #101
Processing business #111
Processing business #121
Processing business #131
Processing business #141
Processing business #151
Processing business #161
Processing business #171
Processing business #181
Processing business #191
Processing business #201
Processing business #211
Processing business #221
Processing business #231
Processing business #241
Processing business #251
Processing business #261
Processing business #271
Processing business #281
Processing business #291
Processing business #301
Processing business #311
Processing business #321
Processing business #331
Processing business #341
Processing business #351
Processing business #361
Processing business #371
Processing business #381
Processing business #391
Processing 

Processing business #3671
Processing business #3681
Processing business #3691
Processing business #3701
Processing business #3711
Processing business #3721
Processing business #3731
Processing business #3741
Processing business #3751
Processing business #3761
Processing business #3771
Processing business #3781
Processing business #3791
Processing business #3801
Processing business #3811
Processing business #3821
Processing business #3831
Processing business #3841
Processing business #3851
Processing business #3861
Processing business #3871
Processing business #3881
Processing business #3891
Processing business #3901
Processing business #3911
Processing business #3921
Processing business #3931
Processing business #3941
Processing business #3951
Processing business #3961
Processing business #3971
Processing business #3981
Processing business #3991
Processing business #4001
Processing business #4011
Processing business #4021
Processing business #4031
Processing business #4041
Processing b

In [65]:
print('Total {}: {}'.format(term, len(df_csv)))
print('Total reviews scrapped: {}'.format(len(df_review)))

Total places: 6064
Total reviews scrapped: 17428


In [66]:
df_review = df_review.drop_duplicates(subset=['id'])

In [67]:
columns={
    'user.id':'user_id',
    'user.profile_url':'user_profile_url',    
    'user.image_url':'user_image_url',
    'user.name':'user_name',
}
df_review_final = df_review.rename(columns=columns)

In [68]:
with open('./../../data/yelp_{}_review.csv'.format(term), 'w') as f:
    df_review_final.to_csv(f, index=False)