In [5]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint

In [66]:
# load in housing data with necessary zips
housing_data = pd.read_csv("Resources/Clean/San_Diego_Housing_Data.csv", index_col=0)
housing_data.reset_index(inplace=True)
housing_data

# get necessary zip
zipcodes_df = pd.DataFrame(housing_data['Zip Code'])
zipcodes_df.rename(columns = {'Zip Code':'zipcode'}, inplace = True)
zipcodes_df

# Create geo tables
schools = pd.read_csv("Resources/Clean/San_Diego_School_Data.csv")
businesses = pd.read_csv("Resources/Clean/San_Diego_Markets.csv")
transit = pd.read_csv("Resources/Clean/San_Diego_Transit.csv")
parks = pd.read_csv("Resources/Clean/San_Diego_Parks.csv")
hospitals = pd.read_csv("Resources/Clean/San_Diego_Hospital_Data.csv")

hospitals

Unnamed: 0,zipcode,name,address,lat,lng
0,91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",32.61752,-117.071364
1,91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",32.619391,-117.022284
2,91942,Sharp Grossmont Hospital,"5555 Grossmont Center Drive La Mesa, CA 91942...",32.781653,-117.008397
3,91950,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",32.685113,-117.082885
4,92024,Scripps Mem Hospital-Encinitas,"354 Santa Fe Drive Encinitas, CA 92024-5182",33.038342,-117.284453
5,92025,Palomar Medical Center,"555 East Valley Parkway Escondido, CA 92025-3084",33.124859,-117.075823
6,92028,Fallbrook Hospital,"624 East Elder Street Fallbrook, CA 92028-3099",33.380736,-117.244647
7,92037,Scripps Mem Hosp-La Jolla,"9888 Genesee Avenue La Jolla, CA 92037-1200",32.885154,-117.225538
8,92037,Scripps Green Hospital,"10666 North Torrey Pines Road La Jolla, CA 92...",32.897036,-117.242773
9,92055,Naval Hospital,"NULL Camp Pendleton, CA 92055-5191",33.317842,-117.320512


In [68]:
# hospitals Geos
hospital_geos = hospitals[['name','lat','lng','zipcode']]
hospital_geos['resource'] = 'hospital'
hospital_geos = hospital_geos.set_index('zipcode')
hospital_geos

Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91911,Bayview Hosp & Mental System,32.61752,-117.071364,hospital
91911,Sharp Chula Vista Med Ctr,32.619391,-117.022284,hospital
91942,Sharp Grossmont Hospital,32.781653,-117.008397,hospital
91950,Paradise Valley Hospital,32.685113,-117.082885,hospital
92024,Scripps Mem Hospital-Encinitas,33.038342,-117.284453,hospital
92025,Palomar Medical Center,33.124859,-117.075823,hospital
92028,Fallbrook Hospital,33.380736,-117.244647,hospital
92037,Scripps Mem Hosp-La Jolla,32.885154,-117.225538,hospital
92037,Scripps Green Hospital,32.897036,-117.242773,hospital
92055,Naval Hospital,33.317842,-117.320512,hospital


In [69]:
# Schools Geos
school_geos = schools[['name','lat','lng','zipcode']]
school_geos['resource'] = 'school'
school_geos = school_geos.set_index('zipcode')
school_geos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  school_geos['resource'] = 'school'


Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92037,Bird Rock Elementary School,32.811186,-117.263065,school
92093,Preuss School Ucsd,32.878454,-117.229467,school
92037,La Jolla Elementary School,32.839236,-117.271699,school
92037,Torrey Pines Elementary School,32.859286,-117.242970,school
92037,La Jolla High School,32.832408,-117.264719,school
...,...,...,...,...
92024,San Dieguito High Academy,33.037340,-117.274455,school
92024,Ocean Knoll Elementary School,33.039846,-117.277131,school
92024,Paul Ecke-Central Elementary School,33.057975,-117.298047,school
92024,Oak Crest Middle School,33.043177,-117.265476,school


In [70]:
# Business Geos
business_geos = businesses[['name','lat','lng','zipcode']]
business_geos['resource'] = 'business'
business_geos = business_geos.set_index('zipcode')
business_geos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  business_geos['resource'] = 'business'


Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91910,Smart & Final Extra!,32.639000,-117.050924,business
91910,Sprouts Farmers Market,32.627807,-117.074994,business
91911,Carnival Supermarket,32.621859,-117.072892,business
91910,Target Grocery,32.653616,-117.065593,business
91910,99 Ranch Market,32.629342,-117.040931,business
...,...,...,...,...
92154,El Picador Foods,32.573865,-117.055439,business
92173,Liva Distributor,32.568932,-117.064476,business
92173,La Bodega Market,32.552844,-117.042370,business
92173,Pancho Villa Mercado,32.561686,-117.063112,business


In [73]:
# Transit Geos

transit_geos = transit[['stop_name','lat','lng','Zip Code']]
transit_geos['resource'] = 'transit'
transit_geos.rename(columns = {'Zip Code':'zipcode'}, inplace = True)
transit_geos.rename(columns = {'stop_name':'name'}, inplace = True)
transit_geos = transit_geos.set_index('zipcode')
transit_geos

Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91941,Allison Av & Palm Av,32.765915,-117.019588,transit
91941,La Mesa Bl & University Av,32.767805,-117.015427,transit
91977,Sweetwater Rd & Jamacha Rd,32.714228,-117.012800,transit
91941,La Mesa Bl & Glen St,32.769344,-117.011003,transit
91977,Orville St & Brucker Av,32.709168,-117.009255,transit
...,...,...,...,...
92008,Plaza Camino Real Transit Center,33.178276,-117.336525,transit
92057,San Luis Rey Transit Center,33.254410,-117.298129,transit
92075,Solana Beach Station,32.992937,-117.271067,transit
92092,Sorrento Valley Station,32.902813,-117.225088,transit


In [75]:
# Park Geos
park_geos = parks[['name','lat','lng','postcode']]
park_geos.rename(columns = {'postcode':'zipcode'}, inplace = True)
park_geos['resource'] = 'park'
park_geos = park_geos.set_index('zipcode')
park_geos

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  park_geos.rename(columns = {'postcode':'zipcode'}, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  park_geos['resource'] = 'park'


Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91901,Wright's Field,32.819494,-116.763489,park
91901,Viejas Park,32.842437,-116.704128,park
91901,Loveland Fishing Access,32.799750,-116.760434,park
91902,Sweetwater Summit Regional Park,32.682508,-117.002547,park
91902,Sweetwater County Park,32.682592,-117.001904,park
...,...,...,...,...
92763,Trestles Beach Trailhead,33.395758,-117.591568,park
92051,Rock Formation,33.402802,-117.551148,park
92763,San Onofre,33.389572,-117.593152,park
92763,Trestles Beach Trailhead,33.396085,-117.591365,park


In [76]:
clean_geos_all = pd.concat([hospital_geos,school_geos, business_geos, transit_geos, park_geos])
clean_geos_all

Unnamed: 0_level_0,name,lat,lng,resource
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91911,Bayview Hosp & Mental System,32.617520,-117.071364,hospital
91911,Sharp Chula Vista Med Ctr,32.619391,-117.022284,hospital
91942,Sharp Grossmont Hospital,32.781653,-117.008397,hospital
91950,Paradise Valley Hospital,32.685113,-117.082885,hospital
92024,Scripps Mem Hospital-Encinitas,33.038342,-117.284453,hospital
...,...,...,...,...
92763,Trestles Beach Trailhead,33.395758,-117.591568,park
92051,Rock Formation,33.402802,-117.551148,park
92763,San Onofre,33.389572,-117.593152,park
92763,Trestles Beach Trailhead,33.396085,-117.591365,park


In [77]:
coords = clean_geos_all[['lat', 'lng']].values
coords

array([[  32.6175204, -117.0713635],
       [  32.6193909, -117.0222837],
       [  32.7816526, -117.0083968],
       ...,
       [  33.3895721, -117.5931518],
       [  33.3960846, -117.5913646],
       [  33.3853075, -117.5939359]])

In [78]:
clean_geos_all.reset_index(inplace = True)
new_df = clean_geos_all[clean_geos_all.zipcode.isin(zipcodes_df.zipcode)]

In [80]:
new_df.groupby(['resource']).count()

Unnamed: 0_level_0,zipcode,name,lat,lng
resource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
business,423,423,423,423
hospital,25,25,25,25
park,1245,1245,1245,1245
school,245,245,245,245
transit,5439,5439,5439,5439


In [None]:
fi