In [119]:
# import dependencies
import pandas as pd
import os
import glob
import requests

In [120]:
# setting path
files = os.path.join("Resources/RAW/hospitals/ushospitalfinder*")

#list of merged files returned
files = glob.glob(files)

# joining files
hospital_df = pd.concat(map(pd.read_csv, files), ignore_index=True)
hospital_df.head(10)

Unnamed: 0,hospital_name,address,even,even 2
0,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",,
1,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",,
2,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",,
3,Sharp Coronado Hospital,"250 Prospect Place Coronado, CA 92118-1943",,
4,Rady Children Hosp & Hlth Ctr,"3020 Children's Way San Diego, CA 92123-4282",,
5,Naval Medical Center,"34800 Bob Wilson Drive San Diego, CA 92134-5000",,
6,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",,
7,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",,
8,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",,
9,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",,


In [121]:
#print column names
print(hospital_df.keys())

Index(['hospital_name', 'address', 'even', 'even 2'], dtype='object')


In [122]:
#dropping columns
clean_df = hospital_df.drop(['even','even 2'], axis=1)

clean_df.head()


Unnamed: 0,hospital_name,address
0,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191..."
1,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005"
2,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919..."
3,Sharp Coronado Hospital,"250 Prospect Place Coronado, CA 92118-1943"
4,Rady Children Hosp & Hlth Ctr,"3020 Children's Way San Diego, CA 92123-4282"


In [134]:
# dropping duplicates
hosp_df = clean_df.drop_duplicates(subset=['hospital_name'])

hosp_df.head()

Unnamed: 0,hospital_name,address
0,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191..."
1,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005"
2,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919..."
3,Sharp Coronado Hospital,"250 Prospect Place Coronado, CA 92118-1943"
4,Rady Children Hosp & Hlth Ctr,"3020 Children's Way San Diego, CA 92123-4282"


In [124]:
hosp_df.value_counts().sum()

35

In [125]:
# extracting zipcodes

hosp_df['zipcode'] = hosp_df['address'].str[-10:]

hosp_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosp_df['zipcode'] = hosp_df['address'].str[-10:]


Unnamed: 0,hospital_name,address,zipcode
0,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",91911-6617
1,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",91911-2005
2,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",91950-2099
3,Sharp Coronado Hospital,"250 Prospect Place Coronado, CA 92118-1943",92118-1943
4,Rady Children Hosp & Hlth Ctr,"3020 Children's Way San Diego, CA 92123-4282",92123-4282
5,Naval Medical Center,"34800 Bob Wilson Drive San Diego, CA 92134-5000",92134-5000
6,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105-2307
7,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104-1096
8,Continental Rehab Hospital,"555 Washington Street San Diego, CA 92103",", CA 92103"
9,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103-2105


In [126]:
# take the last block of strings by splitting on a space and taking the last part
hosp_df['zipcodes'] = [x.split(' ')[-1] for x in hosp_df['address']]
# Check if any of them contains a -
hosp_df.loc[hosp_df['zipcodes'].str.contains('-')]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosp_df['zipcodes'] = [x.split(' ')[-1] for x in hosp_df['address']]


Unnamed: 0,hospital_name,address,zipcode,zipcodes
0,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",91911-6617,91911-6617
1,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",91911-2005,91911-2005
2,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",91950-2099,91950-2099
3,Sharp Coronado Hospital,"250 Prospect Place Coronado, CA 92118-1943",92118-1943,92118-1943
4,Rady Children Hosp & Hlth Ctr,"3020 Children's Way San Diego, CA 92123-4282",92123-4282,92123-4282
5,Naval Medical Center,"34800 Bob Wilson Drive San Diego, CA 92134-5000",92134-5000,92134-5000
6,Promise Hospital of San Diego,"5550 University Avenue San Diego, CA 92105-2307",92105-2307,92105-2307
7,Kindred Hospital-San Diego,"1940 El Cajon Boulevard San Diego, CA 92104-1096",92104-1096,92104-1096
9,Scripps Mercy Hospital,"4077 Fifth Avenue San Diego, CA 92103-2105",92103-2105,92103-2105
10,Alvarado Hosp Medical Center,"6655 Alvarado Road San Diego, CA 92120-5208",92120-5208,92120-5208


In [127]:
hosp_df['zipcode'] = [x.split('-')[0] for x in hosp_df['zipcodes']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosp_df['zipcode'] = [x.split('-')[0] for x in hosp_df['zipcodes']]


In [128]:
hosp_df.set_index('zipcode',inplace=True)
hosp_df.sort_index(inplace=True)
hosp_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosp_df.sort_index(inplace=True)


Unnamed: 0_level_0,hospital_name,address,zipcodes
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",91911-6617
91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",91911-2005
91942,Sharp Grossmont Hospital,"5555 Grossmont Center Drive La Mesa, CA 91942...",91942-3019
91950,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919...",91950-2099
92024,Scripps Mem Hospital-Encinitas,"354 Santa Fe Drive Encinitas, CA 92024-5182",92024-5182
92025,Palomar Medical Center,"555 East Valley Parkway Escondido, CA 92025-3084",92025-3084
92028,Fallbrook Hospital,"624 East Elder Street Fallbrook, CA 92028-3099",92028-3099
92037,Scripps Mem Hosp-La Jolla,"9888 Genesee Avenue La Jolla, CA 92037-1200",92037-1200
92037,Scripps Green Hospital,"10666 North Torrey Pines Road La Jolla, CA 92...",92037-1093
92055,Naval Hospital,"NULL Camp Pendleton, CA 92055-5191",92055-5191


In [129]:
hosp_df.drop(['zipcodes'], axis=1, inplace = True)
hosp_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosp_df.drop(['zipcodes'], axis=1, inplace = True)


Unnamed: 0_level_0,hospital_name,address
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1
91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191..."
91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005"
91942,Sharp Grossmont Hospital,"5555 Grossmont Center Drive La Mesa, CA 91942..."
91950,Paradise Valley Hospital,"2400 East Fourth Street National City, CA 919..."
92024,Scripps Mem Hospital-Encinitas,"354 Santa Fe Drive Encinitas, CA 92024-5182"
92025,Palomar Medical Center,"555 East Valley Parkway Escondido, CA 92025-3084"
92028,Fallbrook Hospital,"624 East Elder Street Fallbrook, CA 92028-3099"
92037,Scripps Mem Hosp-La Jolla,"9888 Genesee Avenue La Jolla, CA 92037-1200"
92037,Scripps Green Hospital,"10666 North Torrey Pines Road La Jolla, CA 92..."
92055,Naval Hospital,"NULL Camp Pendleton, CA 92055-5191"


In [130]:
hosp_df.value_counts().sum()

35

In [131]:
# link example format
# https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY
hosp_coords = []
key = 'AIzaSyA8r48O8blIHmLRGnsGi7zPLQJGn-Wbi1w'
addresses = hosp_df['address'].tolist()
for addy in addresses: 
    
    # cleaning addresses for URL
    new_addy = addy.replace(" ", "+")
    new_addy = new_addy.replace(",", "")
    
    # URL for google geocode API
    hosp_link = f"https://maps.googleapis.com/maps/api/geocode/json?address={new_addy}&key={key}"
    
    # Make a 'Get' request for the school location data.
    hosp_location = requests.get(hosp_link)
    
    # Get the JSON data.
    hosp_data = hosp_location.json()
    lat = hosp_data["results"][0]["geometry"]["location"]["lat"]
    lng = hosp_data["results"][0]["geometry"]["location"]["lng"]
    
    print(new_addy)
    print(hosp_link)
    print(hosp_location)
    print(lat)
    print(lng)
    print()
    
    hosp_coords.append({"lat": lat,
                         "lng": lng})

751+Medical+Center+Court++Chula+Vista+CA+91911-6617
https://maps.googleapis.com/maps/api/geocode/json?address=751+Medical+Center+Court++Chula+Vista+CA+91911-6617&key=AIzaSyA8r48O8blIHmLRGnsGi7zPLQJGn-Wbi1w
<Response [200]>
32.6193909
-117.0222837

330+Moss+Street++Chula+Vista+CA+91911-2005
https://maps.googleapis.com/maps/api/geocode/json?address=330+Moss+Street++Chula+Vista+CA+91911-2005&key=AIzaSyA8r48O8blIHmLRGnsGi7zPLQJGn-Wbi1w
<Response [200]>
32.6175204
-117.0713635

5555+Grossmont+Center+Drive++La+Mesa+CA+91942-3019
https://maps.googleapis.com/maps/api/geocode/json?address=5555+Grossmont+Center+Drive++La+Mesa+CA+91942-3019&key=AIzaSyA8r48O8blIHmLRGnsGi7zPLQJGn-Wbi1w
<Response [200]>
32.7816526
-117.0083968

2400+East+Fourth+Street++National+City+CA+91950-2099
https://maps.googleapis.com/maps/api/geocode/json?address=2400+East+Fourth+Street++National+City+CA+91950-2099&key=AIzaSyA8r48O8blIHmLRGnsGi7zPLQJGn-Wbi1w
<Response [200]>
32.6851132
-117.0828852

354+Santa+Fe+Drive++Encini

In [132]:
hosp_geos_df = pd.DataFrame(hosp_coords, index=None, columns = ["lat", "lng"])
hosp_geos_df['zipcode'] = hosp_df.index
hosp_geos_df.set_index(['zipcode'],inplace=True)
hosp_geos_df.value_counts().sum()

35

In [133]:
full_hosp_df = pd.merge(hosp_df, hosp_geos_df, left_index=True, right_index=True)
full_hosp_df

Unnamed: 0_level_0,hospital_name,address,lat,lng
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",32.619391,-117.022284
91911,Sharp Chula Vista Med Ctr,"751 Medical Center Court Chula Vista, CA 9191...",32.617520,-117.071364
91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",32.619391,-117.022284
91911,Bayview Hosp & Mental System,"330 Moss Street Chula Vista, CA 91911-2005",32.617520,-117.071364
91942,Sharp Grossmont Hospital,"5555 Grossmont Center Drive La Mesa, CA 91942...",32.781653,-117.008397
...,...,...,...,...
92673,Saddleback Mem Medical Center,"654 Camino De Los Mares San Clemente, CA 9267...",33.457484,-117.649924
92691,Children's Hospital at Mission,"27700 Medical Center Road Mission Viejo, CA 9...",33.561015,-117.665394
92691,Children's Hospital at Mission,"27700 Medical Center Road Mission Viejo, CA 9...",33.561015,-117.665394
92691,Mission Hospital,"27700 Medical Center Road Mission Viejo, CA 9...",33.561015,-117.665394


In [10]:
# exporting file
from pathlib import Path

filepath = Path("Resources/Clean/San_Diego_Hospital_Data.csv")
hosp_df.to_csv(filepath)