# Imports and functions

**[Work in progress]**

This notebook creates a .csv file with google information for ingestion into the Knowledge Graph.

In [1]:
import json
import time
import pandas as pd
import googlemaps

from datetime import datetime

In [2]:
# read google api key
with open('key.data', 'r') as key:
    api_key = key.read()

gmaps = googlemaps.Client(key=api_key)

In [3]:
def float_range(start, stop, step):
    while round(start, 6) < stop:
        yield float(round(start, 6))
        start += step

# Generate coordinates for map sweep

In [4]:
# Other terms: restaurant, attraction, hotel
term = 'restaurant'

## Use existing yelp density data

In [5]:
df_density = pd.read_csv('./../../data/yelp_{}_density.csv'.format(term))

In [6]:
# drop coordinate with no restraurants
print('Total coordinates: {}'.format(len(df_density)))

df_density = df_density[df_density['density'] > 0]
print('Total coordinates with potential {}: {}'.format(term, len(df_density)))

Total coordinates: 846
Total coordinates with potential restaurant: 469


In [7]:
df_low_density = df_density[df_density['density'] < 50]
print('Total coordinates with low density {}: {}'.format(term, len(df_low_density)))

df_high_density = df_density[df_density['density'] > 50]
print('Total coordinates with high density {}: {}'.format(term, len(df_high_density)))

Total coordinates with low density restaurant: 425
Total coordinates with high density restaurant: 43


In [8]:
def high_density_breakdown(df):
    tmp = pd.DataFrame(columns=['latitude', 'longitude', 'radius', 'density'])
    for index_label, row_series in df.iterrows():
        new_lat = row_series['latitude']
        new_long = row_series['longitude']
        new_radius =  row_series['radius']
        new_density =  row_series['density'] / 7

        tmp = tmp.append({'latitude':new_lat, 'longitude':new_long, 
                          'radius':new_radius/3.5, 'density':new_density}, ignore_index=True)
        tmp = tmp.append({'latitude':new_lat, 'longitude':new_long-(0.020/2.8), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)
        tmp = tmp.append({'latitude':new_lat, 'longitude':new_long+(0.020/2.8), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)

        tmp = tmp.append({'latitude':new_lat+(0.018/3.5), 'longitude':new_long-(0.020/6), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)
        tmp = tmp.append({'latitude':new_lat+(0.018/3.5), 'longitude':new_long+(0.020/6), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)

        tmp = tmp.append({'latitude':new_lat-(0.018/3.5), 'longitude':new_long-(0.020/6), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)
        tmp = tmp.append({'latitude':new_lat-(0.018/3.5), 'longitude':new_long+(0.020/6), 
                          'radius':new_radius/2.2, 'density':new_density}, ignore_index=True)
    return tmp

In [9]:
df_high_density = high_density_breakdown(df_high_density)
df_density = pd.concat([df_low_density, df_high_density], sort=False, ignore_index=True)

In [10]:
print('Coordinates: {}'.format(len(df_density)))

Coordinates: 726


In [11]:
# 1 degree: approximately 111 km
start_latitude = 32.550
stop_latitude = 33.100

# 1 degree: approximaely 93 km
start_longitude = -117.260
stop_longitude = -116.900

# google api result offset and limit
offset = 0
limit = 20

In [12]:
zone = list()
radius = list()
for index_label, row_series in df_density.iterrows():
    zone.append((row_series['latitude'], row_series['longitude']))
    radius.append(row_series['radius'])

In [13]:
import folium
import random as rnd

zone_center = [
    round((start_latitude + stop_latitude) / 2, 3), 
    round((start_longitude + stop_longitude) / 2, 3)
]
print(zone_center)

# Build map 
map_zone = folium.Map(location=zone_center, zoom_start=10, tiles='cartodbpositron', width=800, height=600)
folium.Rectangle(zone, color='#0080bb', fill_color='#0080bb').add_to(map_zone)

for i in range(len(zone)):
    folium.CircleMarker(zone[i], radius=1, color='#0000FF', fill_color='#0080bb').add_to(map_zone)
    folium.Circle(zone[i], radius=radius[i], color='#0000FF', fill_color='#0080bb').add_to(map_zone)

map_zone

[32.825, -117.08]


# Scapping

In [14]:
def scrap_google(df_combined, latitude, longitude, radius):
    scrapped = 0
    page_token = None
    for i in range(1000):
        print('\tScrapping offset: {}'.format(limit * i))
        if not page_token:
            response = gmaps.places_nearby(location=(latitude, longitude), radius=radius, type=term)
        else:
            response = gmaps.places_nearby(page_token=page_token)
            
        scrapped += len(response['results'])
        df = pd.json_normalize(response['results'])    

        if df_combined is not None:
            df_combined = pd.concat([df_combined, df], sort=False, ignore_index=True)
        else:       
            df_combined = df.copy()

        # More to scrapped from this location
        if response.get('next_page_token'):
            page_token = response['next_page_token']
            time.sleep(3)
        else:
            print('Completed scrapping. Total: {}'.format(scrapped))
            break
    return df_combined.copy()

In [20]:
index = 0
df_business = None
for (latitude, longitude), rad in zip(zone, radius):
    index += 1
    print('Scrapping coordinate {}: ({}, {}). Radius: {}'.format(index, latitude, longitude, rad))
    df_business = scrap_google(df_business, latitude, longitude, rad)

Scrapping coordinate 1: (32.55, -117.06). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 12
Scrapping coordinate 2: (32.55, -117.04). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 36
Scrapping coordinate 3: (32.55, -117.02). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate 4: (32.55, -117.0). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 17
Scrapping coordinate 5: (32.55, -116.98). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 16
Scrapping coordinate 6: (32.55, -116.96). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 7: (32.55, -116.94). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
	Scrapping offset: 40
Completed scrapping. Total: 60
Scrapping coordinate 8: (32.55, -116.92). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 37
Scrapping coordinate 9: (32.5734, -117.14).

Completed scrapping. Total: 6
Scrapping coordinate 72: (32.7372, -117.1). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate 73: (32.7372, -117.08). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 74: (32.7372, -117.06). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 10
Scrapping coordinate 75: (32.7372, -117.04). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 32
Scrapping coordinate 76: (32.7372, -117.02). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 17
Scrapping coordinate 77: (32.7372, -117.0). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 78: (32.7372, -116.98). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 79: (32.7372, -116.96). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 80: (32.7372, -116.94). Radius: 1000.0

Completed scrapping. Total: 3
Scrapping coordinate 142: (32.8542, -116.98). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 18
Scrapping coordinate 143: (32.8542, -116.96). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 144: (32.8542, -116.94). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 16
Scrapping coordinate 145: (32.8542, -116.92). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 20
Scrapping coordinate 146: (32.8776, -117.2). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate 147: (32.8776, -117.18). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate 148: (32.8776, -117.16). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 40
Scrapping coordinate 149: (32.8776, -117.14). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate 150: (32.900999999999996, -1

	Scrapping offset: 20
Completed scrapping. Total: 22
Scrapping coordinate 212: (33.0648, -117.04). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 213: (33.0648, -117.0). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 214: (33.0882, -117.26). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 15
Scrapping coordinate 215: (33.0882, -117.24). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 10
Scrapping coordinate 216: (33.0882, -117.2). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 217: (33.0882, -117.08). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate 218: (33.0882, -117.0). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 219: (32.562414000000004, -117.07). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 8
Scrapping coordinate 220: (32.56241400000

Completed scrapping. Total: 12
Scrapping coordinate 279: (32.726214, -117.13). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 29
Scrapping coordinate 280: (32.726214, -117.11). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 7
Scrapping coordinate 281: (32.726214, -117.09). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 14
Scrapping coordinate 282: (32.726214, -117.07). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 283: (32.726214, -117.03). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate 284: (32.726214, -116.99). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 285: (32.726214, -116.97). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 8
Scrapping coordinate 286: (32.726214, -116.95). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 287: (32.726

Completed scrapping. Total: 1
Scrapping coordinate 349: (32.843214, -116.97). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 9
Scrapping coordinate 350: (32.843214, -116.95). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 351: (32.843214, -116.93). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 352: (32.866614, -117.25). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 353: (32.866614, -117.23). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 39
Scrapping coordinate 354: (32.866614, -117.15). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 355: (32.866614, -116.93). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 6
Scrapping coordinate 356: (32.866614, -116.91). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 357: (32.89001

Completed scrapping. Total: 0
Scrapping coordinate 419: (33.053814, -117.25). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 12
Scrapping coordinate 420: (33.053814, -117.19). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 421: (33.077214, -117.23). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 17
Scrapping coordinate 422: (33.077214, -117.07). Radius: 1000.0
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 29
Scrapping coordinate 423: (33.077214, -117.05). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 424: (33.077214, -117.03). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 425: (33.077214, -116.99). Radius: 1000.0
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 426: (32.667, -117.1). Radius: 285.7142857142857
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 427: 

Completed scrapping. Total: 3
Scrapping coordinate 476: (32.7606, -117.16714285714285). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate 477: (32.7606, -117.15285714285714). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 3
Scrapping coordinate 478: (32.765742857142854, -117.16333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 23
Scrapping coordinate 479: (32.765742857142854, -117.15666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 16
Scrapping coordinate 480: (32.75545714285714, -117.16333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 481: (32.75545714285714, -117.15666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 482: (32.7606, -117.14). Radius: 285.7142857142857
	Scrapping offset: 0
C

Completed scrapping. Total: 21
Scrapping coordinate 533: (32.8308, -117.13285714285715). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 19
Scrapping coordinate 534: (32.83594285714286, -117.14333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 7
Scrapping coordinate 535: (32.83594285714286, -117.13666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 7
Scrapping coordinate 536: (32.825657142857146, -117.14333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 537: (32.825657142857146, -117.13666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 538: (32.8776, -117.24). Radius: 285.7142857142857
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 539: (32.8776, -117.24714285714285). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. To

Completed scrapping. Total: 5
Scrapping coordinate 590: (32.70795685714286, -117.17333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 16
Scrapping coordinate 591: (32.70795685714286, -117.16666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 27
Scrapping coordinate 592: (32.697671142857146, -117.17333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 20
Scrapping coordinate 593: (32.697671142857146, -117.16666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 14
Scrapping coordinate 594: (32.702814000000004, -117.15). Radius: 285.7142857142857
	Scrapping offset: 0
Completed scrapping. Total: 4
Scrapping coordinate 595: (32.702814000000004, -117.15714285714286). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 5
Scrapping coordinate 596: (32.702814000000004, -117.14285714285715). Radius: 454.5

	Scrapping offset: 20
	Scrapping offset: 40
Completed scrapping. Total: 60
Scrapping coordinate 646: (32.75475685714286, -117.17333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 647: (32.75475685714286, -117.16666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 648: (32.744471142857144, -117.17333333333333). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 649: (32.744471142857144, -117.16666666666667). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 1
Scrapping coordinate 650: (32.749614, -117.15). Radius: 285.7142857142857
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 24
Scrapping coordinate 651: (32.749614, -117.15714285714286). Radius: 454.5454545454545
	Scrapping offset: 0
	Scrapping offset: 20
	Scrapping offset: 40
Completed scrapping. Total: 60
Scrapping coo

	Scrapping offset: 20
Completed scrapping. Total: 37
Scrapping coordinate 701: (32.819814, -117.14285714285715). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 702: (32.82495685714286, -117.15333333333334). Radius: 454.5454545454545
	Scrapping offset: 0
	Scrapping offset: 20
	Scrapping offset: 40
Completed scrapping. Total: 53
Scrapping coordinate 703: (32.82495685714286, -117.14666666666668). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 704: (32.814671142857144, -117.15333333333334). Radius: 454.5454545454545
	Scrapping offset: 0
	Scrapping offset: 20
Completed scrapping. Total: 31
Scrapping coordinate 705: (32.814671142857144, -117.14666666666668). Radius: 454.5454545454545
	Scrapping offset: 0
Completed scrapping. Total: 0
Scrapping coordinate 706: (32.843214, -116.99). Radius: 285.7142857142857
	Scrapping offset: 0
Completed scrapping. Total: 2
Scrapping coordinate 707: (32.84321

In [21]:
print('Total business scrapped: {}'.format(len(df_business)))

Total business scrapped: 8592


In [24]:
df_business = df_business.drop_duplicates(subset=['place_id'])

In [25]:
print('Total location scrapped: {}'.format(len(zone)))
print('Total unique business scrapped: {}'.format(len(df_business)))

Total location scrapped: 726
Total unique business scrapped: 6130


In [28]:
columns={
    'geometry.location.lat':'geometry_location_lat',
    'geometry.location.lng':'geometry_location_lng',    
    'geometry.viewport.northeast.lat':'geometry_viewport_northeast_lat',
    'geometry.viewport.northeast.lng':'geometry_viewport_northeast_lng',
    'geometry.viewport.southwest.lat':'geometry_viewport_southwest_lat',
    'geometry.viewport.southwest.lng':'geometry_viewport_southwest_lng',
    'opening_hours.open_now':'opening_hours_open_now',
    'plus_code.compound_code':'plus_code_compound_code',
    'plus_code.global_code':'plus_code_global_code',
}
df_business_final = df_business.rename(columns=columns)

In [29]:
df_business_final.head()

Unnamed: 0,business_status,icon,name,photos,place_id,price_level,rating,reference,scope,types,...,geometry_location_lat,geometry_location_lng,geometry_viewport_northeast_lat,geometry_viewport_northeast_lng,geometry_viewport_southwest_lat,geometry_viewport_southwest_lng,opening_hours_open_now,plus_code_compound_code,plus_code_global_code,permanently_closed
0,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,Subway,"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJtX9_3j5J2YARzcOEJYdwtl0,1.0,4.0,ChIJtX9_3j5J2YARzcOEJYdwtl0,GOOGLE,"[meal_takeaway, restaurant, food, point_of_int...",...,32.555212,-117.052609,32.556586,-117.051318,32.553888,-117.054016,False,"HW4W+3X San Diego, CA, USA",8544HW4W+3X,
1,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,Yum Yum Donuts,"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJ4_64EO5J2YARAMrG3WMF9mA,1.0,4.4,ChIJ4_64EO5J2YARAMrG3WMF9mA,GOOGLE,"[meal_delivery, bakery, cafe, restaurant, food...",...,32.555975,-117.054656,32.557388,-117.0533,32.55469,-117.055998,True,"HW4W+94 San Diego, CA, USA",8544HW4W+94,
2,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,Robertacos,"[{'height': 1836, 'html_attributions': ['<a hr...",ChIJe5v-Nj9J2YARSn1g21LS7xc,,3.8,ChIJe5v-Nj9J2YARSn1g21LS7xc,GOOGLE,"[restaurant, food, point_of_interest, establis...",...,32.556013,-117.05517,32.557466,-117.053872,32.554768,-117.05657,True,"HW4V+CW San Diego, CA, USA",8544HW4V+CW,
3,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,Jack in the Box,"[{'height': 3792, 'html_attributions': ['<a hr...",ChIJiczf5z5J2YARKpv-rNf_85A,1.0,4.3,ChIJiczf5z5J2YARKpv-rNf_85A,GOOGLE,"[restaurant, food, point_of_interest, establis...",...,32.555219,-117.052119,32.556635,-117.050789,32.553937,-117.053487,True,"HW4X+35 San Diego, CA, USA",8544HW4X+35,
4,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,El Jalapeño Taco Shop,"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJzcwtNz9J2YARG4A4O5fGYc8,1.0,3.7,ChIJzcwtNz9J2YARG4A4O5fGYc8,GOOGLE,"[restaurant, food, point_of_interest, establis...",...,32.55593,-117.05466,32.557318,-117.053388,32.55462,-117.056085,True,"HW4W+94 San Diego, CA, USA",8544HW4W+94,


In [30]:
with open('./../../data/google_{}.csv'.format(term), 'w') as f:
    df_business_final.to_csv(f, index=False)

# Get business reviews

In [15]:
df_csv = pd.read_csv('./../../data/google_{}.csv'.format(term))

In [16]:
df_coordinates = df_csv[['geometry_location_lat', 'geometry_location_lng']].dropna()

In [17]:
# Build map 
map_zone = folium.Map(location=zone_center, zoom_start=10, tiles='cartodbpositron', width=800, height=600)
for i in range(len(df_coordinates)):
    folium.CircleMarker((df_coordinates.iloc[i][0], df_coordinates.iloc[i][1]), 
                        radius=1, color='#0000FF', fill_color='#0080bb').add_to(map_zone)
map_zone

In [121]:
# Add additonal columns to place data
df_csv['url'] = np.nan
df_csv['utc_offset'] = np.nan
df_csv['formatted_address'] = np.nan
df_csv['formatted_phone_number'] = np.nan
df_csv['international_phone_number'] = np.nan
df_csv['website'] = np.nan

In [150]:
df_reviews = None  # comment out to continue from last business id
df_addresses = None  # comment out to continue from last business id
start_again = True  # Set to False to continue from last business id
fields = ['url', 'utc_offset', 'formatted_address', 'formatted_phone_number', 'international_phone_number', 'website',
          'address_component', 'review']
for i, place_id in enumerate(df_csv['place_id']):
    if not start_again:
        if place_id == 'ChIJw1Zkh_ZN2YARrB2y_GYDWRE':
            start_again = True
    else:
        response = gmaps.place(place_id=place_id, fields=fields, language='en')
        df_review = pd.json_normalize(response['result']['reviews'])
        df_review.insert(0, 'place_id', place_id) 

        df_address = pd.json_normalize(response['result']['address_components'])
        df_address.insert(0, 'place_id', place_id) 

        if response['result'].get('url'):
            df_csv.loc[(df_csv['place_id']==place_id), 'url'] = response['result']['url']

        if response['result'].get('utc_offset'):
            df_csv.loc[(df_csv['place_id']==place_id), 'utc_offset'] = response['result']['utc_offset']

        if response['result'].get('formatted_address'):
            df_csv.loc[(df_csv['place_id']==place_id), 'formatted_address'] = response['result']['formatted_address']
    
        if response['result'].get('formatted_phone_number'):
            df_csv.loc[(df_csv['place_id']==place_id), 'formatted_phone_number'] = response['result']['formatted_phone_number']
        
        if response['result'].get('international_phone_number'):
            df_csv.loc[(df_csv['place_id']==place_id), 'international_phone_number'] = response['result']['international_phone_number']
        
        if response['result'].get('website'):
            df_csv.loc[(df_csv['place_id']==place_id), 'website'] = response['result']['website']

        if df_reviews is not None:
            df_reviews = pd.concat([df_reviews, df_review], sort=False, ignore_index=True)
        else:       
            df_reviews = df_review.copy()

        if df_addresses is not None:
            df_addresses = pd.concat([df_addresses, df_address], sort=False, ignore_index=True)
        else:
            df_addresses = df_address.copy()
            
    if i % 10 == 0:
        print('Processing business #{}'.format(i + 1))

Processing business #1
Processing business #11
Processing business #21
Processing business #31
Processing business #41
Processing business #51
Processing business #61
Processing business #71
Processing business #81
Processing business #91
Processing business #101
Processing business #111
Processing business #121
Processing business #131
Processing business #141
Processing business #151
Processing business #161
Processing business #171
Processing business #181
Processing business #191
Processing business #201
Processing business #211
Processing business #221
Processing business #231
Processing business #241
Processing business #251
Processing business #261
Processing business #271
Processing business #281
Processing business #291
Processing business #301
Processing business #311
Processing business #321
Processing business #331
Processing business #341
Processing business #351
Processing business #361
Processing business #371
Processing business #381
Processing business #391
Processing 

Processing business #3201
Processing business #3211
Processing business #3221
Processing business #3231
Processing business #3241
Processing business #3251
Processing business #3261
Processing business #3271
Processing business #3281
Processing business #3291
Processing business #3301
Processing business #3311
Processing business #3321
Processing business #3331
Processing business #3341
Processing business #3351
Processing business #3361
Processing business #3371
Processing business #3381
Processing business #3391
Processing business #3401
Processing business #3411
Processing business #3421
Processing business #3431
Processing business #3441
Processing business #3451
Processing business #3461
Processing business #3471
Processing business #3481
Processing business #3491
Processing business #3501
Processing business #3511
Processing business #3521
Processing business #3531
Processing business #3541
Processing business #3551
Processing business #3561
Processing business #3571
Processing b

# Attach additional data place

In [234]:
df_csv['neighborhood'] = np.nan
df_csv['city'] = np.nan
df_csv['county'] = np.nan
df_csv['state'] = np.nan
df_csv['postal_code'] = np.nan
df_csv['country'] = np.nan

In [237]:
c_neighborhood = 0
c_city = 0
c_county = 0
c_state = 0
c_postal_code = 0
c_country = 0

for index_label, row_series in df_addresses.iterrows():
    row_place_id = row_series['place_id']
    row_types = row_series['types']
    row_name = row_series['long_name']
    if 'neighborhood' in row_types:
        c_neighborhood += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'neighborhood'] = row_name
    elif 'locality' in row_types:
        c_city += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'city'] = row_name
    elif 'administrative_area_level_2' in row_types:
        c_county += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'county'] = row_name
    elif 'administrative_area_level_1' in row_types:
        c_state += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'state'] = row_name
    elif 'postal_code' in row_types:
        c_postal_code += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'postal_code'] = row_name
    elif 'country' in row_types:
        c_country += 1
        df_csv.loc[(df_csv['place_id']==row_place_id), 'country'] = row_name
        
print('neighborhood: {}'.format(c_neighborhood))
print('city: {}'.format(c_city))
print('county: {}'.format(c_county))
print('state: {}'.format(c_state))
print('postal_code: {}'.format(c_postal_code))
print('country: {}'.format(c_country))
print('unique record: {}'.format(df_csv['place_id'].nunique()))

4178
6129
6015
6129
6103
6129


In [239]:
print('Total places: {}'.format(len(df_csv)))
print('Total reviews scrapped: {}'.format(len(df_reviews)))
print('Total adddresses scrapped: {}'.format(len(df_addresses)))

Total places: 6130
Total reviews scrapped: 27283
Total adddresses scrapped: 49700


In [240]:
with open('./../../data/google_{}.csv'.format(term), 'w') as f:
    df_csv.to_csv(f, index=False)

In [161]:
with open('./../../data/google_{}_review.csv'.format(term), 'w') as f:
    df_reviews.to_csv(f, index=False)

In [159]:
with open('./../../data/google_{}_address.csv'.format(term), 'w') as f:
    df_addresses.to_csv(f, index=False)