In [1]:
# import dependencies
import pandas as pd
from geopy.geocoders import Nominatim
import requests
import json
import gmaps
from pandas import json_normalize
from config import API_KEY

In [2]:
# Read in dataset
housing_data = pd.read_csv("Resources/Clean/San_Diego_Housing_Data.csv")

# Make new df with San Diego zipcodes
zipcodesdf = housing_data[["Zip Code"]]
zipcodesdf

Unnamed: 0,Zip Code
0,91901
1,91902
2,91906
3,91910
4,91911
...,...
80,92131
81,92139
82,92154
83,92173


In [3]:
# Read in US zipcode database
us_zip = pd.read_csv("Resources/RAW/uszips.csv")

In [4]:
# Create dataframe for just zipcodes and lat/lon
lat_lng = us_zip[['zip', 'lat', 'lng']]
# Rename the zip column to zipcode so we can merge
lat_lng.rename(columns={'zip':'Zip Code'}, inplace=True)
lat_lng.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lat_lng.rename(columns={'zip':'Zip Code'}, inplace=True)


Unnamed: 0,Zip Code,lat,lng
0,601,18.18027,-66.75266
1,602,18.36075,-67.17541
2,603,18.45744,-67.12225
3,606,18.16585,-66.93716
4,610,18.2911,-67.12243


In [5]:
# Merge dataframes on the zipcode
zip_lat = zipcodesdf.merge(lat_lng, on='Zip Code')
zip_lat.head(20)

Unnamed: 0,Zip Code,lat,lng
0,91901,32.81467,-116.71929
1,91902,32.67479,-117.00475
2,91906,32.66241,-116.47255
3,91910,32.63653,-117.06301
4,91911,32.60686,-117.04984
5,91913,32.62174,-116.98609
6,91914,32.66604,-116.95426
7,91915,32.62266,-116.95013
8,91932,32.56923,-117.11798
9,91935,32.70214,-116.78721


In [6]:
# Bring the index backto zipcode so we can iterate through safely.
zip_lat.set_index(['Zip Code'], inplace=True)

In [7]:
# Setting our API parameters
params = {
    "radius": 5000,
    "type": 'park',
    "key": API_KEY
}

In [8]:
# Iterate the responses and filter out market names in a zipcode.
park_list = []

for index, row in zip_lat.iterrows():
    lat = zip_lat.loc[index, 'lat']
    lng = zip_lat.loc[index, 'lng']
    
    
    params["location"] = f"{lat},{lng}"
    
    base_url= "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    
    parks = requests.get(base_url,params=params).json()

    park_list.extend(parks.get('results'))


In [9]:
# Confirm we got results
len(park_list)

1496

In [10]:
# Confirm we got coordinates for each market in a zipcode
park_list[0]

{'business_status': 'OPERATIONAL',
 'geometry': {'location': {'lat': 32.8194935, 'lng': -116.763489},
  'viewport': {'northeast': {'lat': 32.82063590000001, 'lng': -116.7614013},
   'southwest': {'lat': 32.8160663, 'lng': -116.7641849}}},
 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/park-71.png',
 'icon_background_color': '#4DB546',
 'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/tree_pinlet',
 'name': "Wright's Field",
 'opening_hours': {'open_now': True},
 'photos': [{'height': 3024,
   'html_attributions': ['<a href="https://maps.google.com/maps/contrib/102936517431979644747">paterjo</a>'],
   'photo_reference': 'AcYSjRgvgIXePig_e34hO9rQp0l6FvkvuS2NZj6Wut9w1lbvyipVy4MX6hQNsaB8qZ_MQORMNRLSqEnVCybMpJjxaiUErrzPJgL6alIWEYMm9Q-ADp7zW__bNgk9fiVTaq7JyBDvqlAMU4xHiRSbscSx5v96N_ZnKwmK9YQy2pxjaqF7XezM',
   'width': 4032}],
 'place_id': 'ChIJ19iiEcRg2YARKN4_P-d2D7I',
 'plus_code': {'compound_code': 'R69P+QJ Alpine, CA, USA',
  'global_code':

In [12]:
# Pull market names from our JSON
df3 = pd.DataFrame([x['name'] for x in park_list])
df3.set_axis(['name'],axis='columns', inplace=True)
df3

Unnamed: 0,name
0,Wright's Field
1,Viejas Park
2,Loveland Fishing Access
3,Sweetwater Summit Regional Park
4,Sweetwater County Park
...,...
1491,5th Marine Regiment Memorial
1492,Adventures trail
1493,San Onofre
1494,Trestles Beach Trailhead


In [13]:
# Pull lat/lon from our JSON
df4 = pd.DataFrame([x['geometry']['location'] for x in park_list])
df4

Unnamed: 0,lat,lng
0,32.819494,-116.763489
1,32.842437,-116.704128
2,32.799750,-116.760434
3,32.682508,-117.002547
4,32.682592,-117.001904
...,...,...
1491,33.422620,-117.556345
1492,33.343913,-117.515095
1493,33.389572,-117.593152
1494,33.396085,-117.591365


In [14]:
# Merge all our dataframes into one
final_park_df = pd.concat([df3, df4], axis = 1)
final_park_df

Unnamed: 0,name,lat,lng
0,Wright's Field,32.819494,-116.763489
1,Viejas Park,32.842437,-116.704128
2,Loveland Fishing Access,32.799750,-116.760434
3,Sweetwater Summit Regional Park,32.682508,-117.002547
4,Sweetwater County Park,32.682592,-117.001904
...,...,...,...
1491,5th Marine Regiment Memorial,33.422620,-117.556345
1492,Adventures trail,33.343913,-117.515095
1493,San Onofre,33.389572,-117.593152
1494,Trestles Beach Trailhead,33.396085,-117.591365


In [15]:
# Getting zipcodes for each market
import geopy
def get_zipcode(df, geolocator, lat_field, lon_field):
    location = geolocator.reverse((df[lat_field], df[lon_field]))
    return location.raw['address']

geolocator = geopy.Nominatim(user_agent='user-agent')
zipcodes = final_park_df.apply(get_zipcode, axis=1, geolocator=geolocator, lat_field='lat', lon_field='lng')

In [16]:
# Export dataframe as js file 
output_file = 'parks.js'
zipcodes.to_json(output_file, orient='records')

In [17]:
# Reading the js file and exporting to dataframe
zipsjs = pd.read_json('./parks.js')
zipsjs = pd.DataFrame(zipsjs)
zipsjs

Unnamed: 0,road,county,state,ISO3166-2-lvl4,postcode,country,country_code,amenity,house_number,tourism,...,commercial,industrial,subdivision,office,club,landuse,city_district,natural,residential,railway
0,Service Road,San Diego County,California,US-CA,91901.0,United States,us,,,,...,,,,,,,,,,
1,Willows Road,San Diego County,California,US-CA,91901.0,United States,us,Viejas Casino and Resort,5000,,...,,,,,,,,,,
2,Japatul Road,San Diego County,California,US-CA,91901.0,United States,us,,17741,,...,,,,,,,,,,
3,Summit Meadow Road,San Diego County,California,US-CA,91902.0,United States,us,,,SweetwaterCampground,...,,,,,,,,,,
4,Summit Meadow Road,San Diego County,California,US-CA,91902.0,United States,us,,,SweetwaterCampground,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1491,,San Diego County,California,US-CA,,United States,us,,,,...,,,,,,,,,,
1492,Old Pacific Highway,San Diego County,California,US-CA,,United States,us,,,,...,,,,,,,,,,
1493,Trestles Beach Trail,San Diego County,California,US-CA,92763.0,United States,us,,,,...,,,,,,,,,,
1494,Trestles Beach Trail,San Diego County,California,US-CA,92763.0,United States,us,,,,...,,,,,,,,,,


In [18]:
# Checking the columns of the new dataframe
zipsjs.columns

Index(['road', 'county', 'state', 'ISO3166-2-lvl4', 'postcode', 'country',
       'country_code', 'amenity', 'house_number', 'tourism', 'shop', 'hamlet',
       'city', 'village', 'leisure', 'neighbourhood', 'place',
       'state_district', 'military', 'town', 'suburb', 'historic', 'retail',
       'building', 'man_made', 'highway', 'emergency', 'district', 'quarter',
       'commercial', 'industrial', 'subdivision', 'office', 'club', 'landuse',
       'city_district', 'natural', 'residential', 'railway'],
      dtype='object')

In [19]:
# Keeping relevant columns
parks_zipcodes = zipsjs[['postcode', 'county']]
parks_zipcodes = pd.DataFrame(parks_zipcodes)

In [20]:
# Merge all our dataframes into one
final_parks_df = pd.concat([df3, df4, parks_zipcodes], axis = 1)
final_parks_df

Unnamed: 0,name,lat,lng,postcode,county
0,Wright's Field,32.819494,-116.763489,91901.0,San Diego County
1,Viejas Park,32.842437,-116.704128,91901.0,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901.0,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902.0,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902.0,San Diego County
...,...,...,...,...,...
1491,5th Marine Regiment Memorial,33.422620,-117.556345,,San Diego County
1492,Adventures trail,33.343913,-117.515095,,San Diego County
1493,San Onofre,33.389572,-117.593152,92763.0,San Diego County
1494,Trestles Beach Trailhead,33.396085,-117.591365,92763.0,San Diego County


In [21]:
# Removing NaN or other non-numeric values from the postcode column
final_parks_df = final_parks_df[pd.to_numeric(final_parks_df['postcode'], errors='coerce').notnull()]
final_parks_df

Unnamed: 0,name,lat,lng,postcode,county
0,Wright's Field,32.819494,-116.763489,91901.0,San Diego County
1,Viejas Park,32.842437,-116.704128,91901.0,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901.0,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902.0,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902.0,San Diego County
...,...,...,...,...,...
1482,Trestles Beach Trailhead,33.395758,-117.591568,92763.0,San Diego County
1483,Rock Formation,33.402802,-117.551148,92051.0,San Diego County
1493,San Onofre,33.389572,-117.593152,92763.0,San Diego County
1494,Trestles Beach Trailhead,33.396085,-117.591365,92763.0,San Diego County


In [27]:
final_parks_df.postcode = final_parks_df.postcode.apply('int64')
final_parks_df = final_parks_df.rename(columns={'postcode':'zipcode'})
final_parks_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_parks_df.postcode = final_parks_df.postcode.apply('int64')


Unnamed: 0,name,lat,lng,zipcode,county
0,Wright's Field,32.819494,-116.763489,91901,San Diego County
1,Viejas Park,32.842437,-116.704128,91901,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902,San Diego County
...,...,...,...,...,...
1377,Trestles Beach Trailhead,33.395758,-117.591568,92763,San Diego County
1378,Rock Formation,33.402802,-117.551148,92051,San Diego County
1379,San Onofre,33.389572,-117.593152,92763,San Diego County
1380,Trestles Beach Trailhead,33.396085,-117.591365,92763,San Diego County


In [23]:
final_parks_df = final_parks_df[final_parks_df['county'] == 'San Diego County']
final_parks_df

Unnamed: 0,name,lat,lng,postcode,county
0,Wright's Field,32.819494,-116.763489,91901,San Diego County
1,Viejas Park,32.842437,-116.704128,91901,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902,San Diego County
...,...,...,...,...,...
1482,Trestles Beach Trailhead,33.395758,-117.591568,92763,San Diego County
1483,Rock Formation,33.402802,-117.551148,92051,San Diego County
1493,San Onofre,33.389572,-117.593152,92763,San Diego County
1494,Trestles Beach Trailhead,33.396085,-117.591365,92763,San Diego County


In [25]:
final_parks_df.reset_index(inplace=True)
final_parks_df.drop(['index'],axis=1,inplace=True)
final_parks_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_parks_df.drop(['index'],axis=1,inplace=True)


Unnamed: 0,name,lat,lng,postcode,county
0,Wright's Field,32.819494,-116.763489,91901,San Diego County
1,Viejas Park,32.842437,-116.704128,91901,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902,San Diego County
...,...,...,...,...,...
1377,Trestles Beach Trailhead,33.395758,-117.591568,92763,San Diego County
1378,Rock Formation,33.402802,-117.551148,92051,San Diego County
1379,San Onofre,33.389572,-117.593152,92763,San Diego County
1380,Trestles Beach Trailhead,33.396085,-117.591365,92763,San Diego County


In [28]:
new_df = final_parks_df[final_parks_df.zipcode.isin(zipcodesdf['Zip Code'])]
new_df

Unnamed: 0,name,lat,lng,zipcode,county
0,Wright's Field,32.819494,-116.763489,91901,San Diego County
1,Viejas Park,32.842437,-116.704128,91901,San Diego County
2,Loveland Fishing Access,32.799750,-116.760434,91901,San Diego County
3,Sweetwater Summit Regional Park,32.682508,-117.002547,91902,San Diego County
4,Sweetwater County Park,32.682592,-117.001904,91902,San Diego County
...,...,...,...,...,...
1371,Howard Lane Neighborhood Park,32.566448,-117.063594,92173,San Diego County
1372,Tijuana River Valley Sports Complex,32.558188,-117.075484,92154,San Diego County
1373,Tijuana River County Open Space Preserve PARK ...,32.544846,-117.074652,92154,San Diego County
1374,San Ysidro Community Park,32.554067,-117.044285,92173,San Diego County


In [29]:
new_df.set_index('zipcode',inplace=True)
new_df

Unnamed: 0_level_0,name,lat,lng,county
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91901,Wright's Field,32.819494,-116.763489,San Diego County
91901,Viejas Park,32.842437,-116.704128,San Diego County
91901,Loveland Fishing Access,32.799750,-116.760434,San Diego County
91902,Sweetwater Summit Regional Park,32.682508,-117.002547,San Diego County
91902,Sweetwater County Park,32.682592,-117.001904,San Diego County
...,...,...,...,...
92173,Howard Lane Neighborhood Park,32.566448,-117.063594,San Diego County
92154,Tijuana River Valley Sports Complex,32.558188,-117.075484,San Diego County
92154,Tijuana River County Open Space Preserve PARK ...,32.544846,-117.074652,San Diego County
92173,San Ysidro Community Park,32.554067,-117.044285,San Diego County


In [30]:
# exporting file
from pathlib import Path

filepath = Path("Resources/Clean/San_Diego_Parks.csv")
new_df.to_csv(filepath)