## ETL Project: USA HOSPITALS
#### Dependencies and Setup

In [79]:
import requests
import psycopg2
import pandas as pd
import config as creds
import sqlalchemy as sqlalchemy_package

#### Extracting Data via API interaction 
* ESRI Dataset: Definitive Healthcare: USA Hospital Beds

In [8]:
# Query URL
url = f"https://services7.arcgis.com/LXCny1HyhQCUSueu/arcgis/rest/services/Definitive_Healthcare_USA_Hospital_Beds/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json"

In [30]:
# Get Hospitals JSON data
response = requests.get(url).json()
hospitals_data = response['features']
hospitals_data[1]['attributes']

{'OBJECTID': 2,
 'HOSPITAL_NAME': 'Southern Arizona VA Health Care System',
 'HOSPITAL_TYPE': 'VA Hospital',
 'HQ_ADDRESS': '3601 S 6th Ave',
 'HQ_ADDRESS1': None,
 'HQ_CITY': 'Tucson',
 'HQ_STATE': 'AZ',
 'HQ_ZIP_CODE': '85723',
 'COUNTY_NAME': 'Pima',
 'STATE_NAME': 'Arizona',
 'STATE_FIPS': '04',
 'CNTY_FIPS': '019',
 'FIPS': '04019',
 'NUM_LICENSED_BEDS': 295,
 'NUM_STAFFED_BEDS': 295,
 'NUM_ICU_BEDS': 2,
 'ADULT_ICU_BEDS': 2,
 'PEDI_ICU_BEDS': None,
 'BED_UTILIZATION': None,
 'Potential_Increase_In_Bed_Capac': 0,
 'AVG_VENTILATOR_USAGE': 2}

In [95]:
# Create Lists of Data Dictionaries
hospitals = []
hospitals_beds = []
hospitals_location = []
hospitals_geometry = []
number = 1
for record in hospitals_data:
    hospital_id = str(number) + "-" + record['attributes']['FIPS']
    number += 1
    # create hospitals dataset
    row = {}
    row['id'] = hospital_id
    row['hospital_name'] = record['attributes']['HOSPITAL_NAME']
    hospitals.append(row)
    # create hospitals beds dataset
    row = {}
    row['id'] = hospital_id
    row['licensed_beds'] = record['attributes']['NUM_LICENSED_BEDS']
    row['icu_beds'] = record['attributes']['NUM_ICU_BEDS']
    row['ventilator_usage'] = record['attributes']['AVG_VENTILATOR_USAGE']
    hospitals_beds.append(row)
    # create hospitals location dataset
    row = {}
    row['id'] = hospital_id
    row['hospital_type'] = record['attributes']['HOSPITAL_TYPE']
    row['hospital_hq_address'] = record['attributes']['HQ_ADDRESS']
    row['hospital_hq_city'] = record['attributes']['HQ_CITY']
    row['hospital_hq_state'] = record['attributes']['HQ_STATE']
    row['hospital_hq_zip_code'] = record['attributes']['HQ_ZIP_CODE']
    row['state_name'] = record['attributes']['STATE_NAME']
    hospitals_location.append(row)
    # create hospitals geometry
    row = {}
    row['id'] = hospital_id
    row['lat'] = record['geometry']['x']
    row['lng'] = record['geometry']['y']
    hospitals_geometry.append(row)    

#### Transformation Data
* Converting lists of dictionaries into DataFrames

In [96]:
# Creating DataFrame for Hospitals 
hospitals_geometry_pd = pd.DataFrame(hospitals_geometry)
hospitals_geometry_pd.set_index("id", inplace=True)
hospitals_geometry_pd.head(5)

Unnamed: 0_level_0,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1-04013,-112.066157,33.495498
2-04019,-110.965885,32.181263
3-06019,-119.779742,36.773324
4-09009,-72.95761,41.2844
5-10003,-75.606532,39.740206


In [90]:
# Creating DataFrame for Hospitals Location Data
hospitals_location_pd = pd.DataFrame(hospitals_location)
hospitals_location_pd.set_index("id", inplace=True)
hospitals_location_pd.head(5)

Unnamed: 0_level_0,hospital_type,hospital_hq_address,hospital_hq_city,hospital_hq_state,hospital_hq_zip_code,state_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1-04013,VA Hospital,650 E Indian School Rd,Phoenix,AZ,85012,Arizona
2-04019,VA Hospital,3601 S 6th Ave,Tucson,AZ,85723,Arizona
3-06019,VA Hospital,2615 E Clinton Ave,Fresno,CA,93703,California
4-09009,VA Hospital,950 Campbell Ave,West Haven,CT,6516,Connecticut
5-10003,VA Hospital,1601 Kirkwood Hwy,Wilmington,DE,19805,Delaware


In [88]:
# Creating DataFrame for Hospitals 
hospitals_beds_pd = pd.DataFrame(hospitals_beds)
hospitals_beds_pd.set_index("id", inplace=True)
hospitals_beds_pd.head(5)

Unnamed: 0_level_0,licensed_beds,icu_beds,ventilator_usage
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-04013,62.0,0,0
2-04019,295.0,2,2
3-06019,54.0,2,2
4-09009,216.0,1,2
5-10003,62.0,0,1


In [77]:
# Creating DataFrame for Hospitals 
hospitals_pd = pd.DataFrame(hospitals)
hospitals_pd.set_index("id", inplace=True)
hospitals_pd.head(5)

Unnamed: 0_level_0,hospital_name
id,Unnamed: 1_level_1
1-04013,Phoenix VA Health Care System (AKA Carl T Hayd...
2-04019,Southern Arizona VA Health Care System
3-06019,VA Central California Health Care System
4-09009,VA Connecticut Healthcare System - West Haven ...
5-10003,Wilmington VA Medical Center


### Create database connection

In [82]:
# Create PostgreSQL database connection using credentials form config.sys
database_url = f"postgres://{creds.PGUSER}:{creds.PGPASSWORD}@{creds.PGHOST}:5432/{creds.PGDATABASE}"
engine = sqlalchemy_package.create_engine(database_url,
                connect_args={'options': '-csearch_path={}'.format(creds.DBSCHEMA)})

In [85]:
# Confirm tables
engine.table_names()

['us_hospitals', 'hospitals_location', 'hospitals_beds', 'hospitals_geometry']

### Load DataFrames into database

In [84]:
# Load US hospitals list
hospitals_pd.to_sql(name='us_hospitals', con=engine, if_exists='append', index=True)

In [89]:
# Load US hospitals beds
hospitals_beds_pd.to_sql(name='hospitals_beds', con=engine, if_exists='append', index=True)

In [93]:
# Load US hospitals location
hospitals_location_pd.to_sql(name='hospitals_location', con=engine, if_exists='append', index=True)

In [97]:
# Load US hospitals geometry
hospitals_geometry_pd.to_sql(name='hospitals_geometry', con=engine, if_exists='append', index=True)

### Query Analysis

In [99]:
def load_data(schema='public', table='us_hospitals', query=None):
    if query == None:
        sql_command = "SELECT * FROM {}.{};".format(str(schema), str(table))
    else: sql_command = query
    print ('Query:',sql_command)
    # Load the data
    data = pd.read_sql(sql_command, conn)
    print('Data shape:',data.shape)
    return (data)

In [100]:
hospitals_request = """
    SELECT h.hospital_name, l.hospital_type, l.state_name, b.licensed_beds
    FROM hospitals AS h
    INNER JOIN hospitals_beds AS b USING (id)
    INNER JOIN hospitals_location AS l USING (id);
    """
hosp_data = load_data(query=hospitals_request)
hosp_data.head()

Query: 
    SELECT h.hospital_name, l.hospital_type, l.state_name, b.licensed_beds
    FROM hospitals AS h
    INNER JOIN hospitals_beds AS b USING (id)
    INNER JOIN hospitals_location AS l USING (id);
    


NameError: name 'conn' is not defined