In [79]:
# import dependencies
import pandas as pd
import os
import glob
import requests
from config import API_KEY
pd.set_option('mode.chained_assignment', None)

In [80]:
# setting path
files = os.path.join("Resources/RAW/hospitals/ushospitalfinder*")

#list of merged files returned
files = glob.glob(files)

# joining files
hospital_df = pd.concat(map(pd.read_csv, files), ignore_index=True)
hospital_df.head(10)

Unnamed: 0,hospital_name,address,even,even 2
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",,
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",,
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",,
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",,
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",,
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",,
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",,
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",,
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",,
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",,


In [81]:
#print column names
print(hospital_df.keys())

Index(['hospital_name', 'address', 'even', 'even 2'], dtype='object')


In [82]:
#dropping columns
clean_df = hospital_df.drop(['even','even 2'], axis=1)
clean_df

Unnamed: 0,hospital_name,address
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701"
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123"
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507"
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096"
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499"
...,...,...
88,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123"
89,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701"
90,VA San Diego Healthcare System,"3350 Lajolla Village Drive San Diego, CA 9216..."
91,Scripps Mem Hosp-La Jolla,"9888 Genesee Avenue La Jolla, CA 92037-1200"


In [83]:
# dropping duplicates
hosp_df = clean_df.drop_duplicates(subset=['hospital_name'])
hosp_df

Unnamed: 0,hospital_name,address
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701"
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123"
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507"
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096"
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499"
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970"
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105"
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103"
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208"
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307"


In [84]:
hosp_df.value_counts().sum()

35

In [85]:
# extracting zipcodes
hosp_df['zipcode'] = hosp_df['address'].str[-10:]
hosp_df

Unnamed: 0,hospital_name,address,zipcode
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123-2701
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",", CA 92123"
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120-2507
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104-1096
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103-7499
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103-8970
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103-2105
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",", CA 92103"
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120-5208
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105-2307


In [86]:
# take the last block of strings by splitting on a space and taking the last part
hosp_df['zipcodes'] = [x.split(' ')[-1] for x in hosp_df['address']]
# Check if any of them contains a -
hosp_df.loc[hosp_df['zipcodes'].str.contains('-')]

Unnamed: 0,hospital_name,address,zipcode,zipcodes
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123-2701,92123-2701
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120-2507,92120-2507
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104-1096,92104-1096
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103-7499,92103-7499
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103-8970,92103-8970
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103-2105,92103-2105
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120-5208,92120-5208
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105-2307,92105-2307
10,San Diego Cnty Psych Hospital,"3853 Rosecrans Street San Diego, CA 92110-3115",92110-3115,92110-3115
11,Naval Medical Center,"34800 Bob Wilson Drive San Diego, CA 92134-5000",92134-5000,92134-5000


In [87]:
hosp_df['zipcode'] = [x.split('-')[0] for x in hosp_df['zipcodes']]
hosp_df

Unnamed: 0,hospital_name,address,zipcode,zipcodes
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123,92123-2701
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",92123,92123
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120,92120-2507
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104,92104-1096
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103,92103-7499
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103,92103-8970
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103,92103-2105
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",92103,92103
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120,92120-5208
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105,92105-2307


In [88]:
hosp_df.reset_index(inplace=True)
hosp_df.drop(['zipcodes', 'index'], axis=1, inplace = True)
hosp_df['zipcode'] = hosp_df['zipcode'].astype('int64')
hosp_df

Unnamed: 0,hospital_name,address,zipcode
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",92123
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",92103
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105


In [89]:
print(hosp_df.value_counts().sum())
hosp_df

35


Unnamed: 0,hospital_name,address,zipcode
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",92123
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",92103
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105


In [46]:
# link example format
# https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY
hosp_coords = []
key = API_KEY
addresses = hosp_df['address'].tolist()
for addy in addresses: 
    
    # cleaning addresses for URL
    new_addy = addy.replace(" ", "+")
    new_addy = new_addy.replace(",", "")
    
    # URL for google geocode API
    hosp_link = f"https://maps.googleapis.com/maps/api/geocode/json?address={new_addy}&key={key}"
    
    # Make a 'Get' request for the school location data.
    hosp_location = requests.get(hosp_link)
    
    # Get the JSON data.
    hosp_data = hosp_location.json()
    lat = hosp_data["results"][0]["geometry"]["location"]["lat"]
    lng = hosp_data["results"][0]["geometry"]["location"]["lng"]
    
    # print(new_addy)
    # print(hosp_link)
    # print(hosp_location)
    # print(lat)
    # print(lng)
    # print()
    
    hosp_coords.append({"lat": lat,
                         "lng": lng})

In [76]:
hosp_geos_df = pd.DataFrame(hosp_coords, index=None, columns = ["lat", "lng"])
hosp_geos_df['zipcode'] = hosp_df.zipcode
hosp_geos_df['zipcode'] = hosp_geos_df['zipcode'].astype('int64')
print(hosp_geos_df.value_counts().sum())
hosp_geos_df

35


Unnamed: 0,lat,lng,zipcode
0,32.799521,-117.154603,92123
1,32.798373,-117.15506,92123
2,32.791613,-117.095405,92120
3,32.75635,-117.144261,92104
4,32.758465,-117.163698,92103
5,32.75427,-117.166135,92103
6,32.751089,-117.160556,92103
7,32.749746,-117.159794,92103
8,32.776641,-117.057319,92120
9,32.749504,-117.076742,92105


In [102]:
finaldf = hosp_df.merge(hosp_geos_df, left_index=True, right_index=True)
finaldf

Unnamed: 0,hospital_name,address,zipcode_x,lat,lng,zipcode_y
0,Sharp Memorial Hospital,"7901 Frost Street San Diego, CA 92123-2701",92123,32.799521,-117.154603,92123
1,Sharp Mary Birch Hospital,"3003 Health Center Drive San Diego, CA 92123",92123,32.798373,-117.15506,92123
2,Kaiser Foundation Hospital,"4647 Zion Avenue San Diego, CA 92120-2507",92120,32.791613,-117.095405,92120
3,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104,32.75635,-117.144261,92104
4,San Diego Hospice,"4311 Third Avenue San Diego, CA 92103-7499",92103,32.758465,-117.163698,92103
5,Univ of CA San Diego Med Ctr,"200 West Arbor Drive San Diego, CA 92103-8970",92103,32.75427,-117.166135,92103
6,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103,32.751089,-117.160556,92103
7,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",92103,32.749746,-117.159794,92103
8,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120,32.776641,-117.057319,92120
9,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105,32.749504,-117.076742,92105


In [103]:
finaldf.drop(['zipcode_y'], axis=1, inplace=True)
finaldf.rename(columns={'zipcode_x':'zipcode','hospital_name':'name'}, inplace=True)
finaldf.set_index(['zipcode'], inplace=True)
finaldf.sort_index(inplace=True)
finaldf

Unnamed: 0_level_0,name,address,lat,lng
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",32.61752,-117.071364
91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",32.619391,-117.022284
91942,Sharp Grossmont Hospital,"5555 Grossmont Center Drive La Mesa, CA 91942...",32.781653,-117.008397
91950,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",32.685113,-117.082885
92024,Scripps Mem Hospital-Encinitas,"354 Santa Fe Drive Encinitas, CA 92024-5182",33.038342,-117.284453
92025,Palomar Medical Center,"555 East Valley Parkway Escondido, CA 92025-3084",33.124859,-117.075823
92028,Fallbrook Hospital,"624 East Elder Street Fallbrook, CA 92028-3099",33.380736,-117.244647
92037,Scripps Mem Hosp-La Jolla,"9888 Genesee Avenue La Jolla, CA 92037-1200",32.885154,-117.225538
92037,Scripps Green Hospital,"10666 North Torrey Pines Road La Jolla, CA 92...",32.897036,-117.242773
92055,Naval Hospital,"NULL Camp Pendleton, CA 92055-5191",33.317842,-117.320512


In [105]:
# exporting file
from pathlib import Path

filepath = Path("Resources/Clean/San_Diego_Hospital_Data.csv")
finaldf.to_csv(filepath)