# Health Professional Shortage Areas - Exploratory Data Analysis
---
## Goal:
### Which states/counties in the U.S. are facing the most severe shortages of healthcare professionals?
---

In [1]:
import pandas as pd
import seaborn as sns

In [2]:
# Import data as df
df = pd.read_excel('raw_data/BCD_HPSA_FCT_DET_PC.xlsx')

# Make copy of df called hpsa; hpsa will be transformed
hpsa = df.copy()

#### Subset data to only geographic areas, only single counties, having a HPSA status of Designated or Proposed for Withdrawal

In [56]:
# I only want to focus on HPSAs in geographic areas (NOT populations groups or facilities)
# Get info about the type of HPSAs in this dataset
hpsa['HPSA Population Type'].value_counts()

Geographic Population                                          27604
Low Income Population HPSA                                     22282
Medicaid Eligible Population HPSA                               5745
Low Income Migrant Farmworker Population HPSA                    669
Other Population HPSA                                            557
Low Income Homeless Population HPSA                              520
Low Income Homeless Migrant Farmworker Population HPSA           450
Homeless Population HPSA                                         443
Native American Population HPSA                                  327
Migrant Seasonal Worker Population HPSA                          180
Low Income Migrant Seasonal Worker Population HPSA                54
Low Income Homeless Migrant Seasonal Worker Population HPSA       44
Migrant Farmworker Population HPSA                                24
Name: HPSA Population Type, dtype: int64

In [58]:
# Keep only the rows that are of the Geographic Population type
hpsa = hpsa[hpsa['HPSA Population Type'] == 'Geographic Population']

# Check shape to make sure indexing worked
hpsa.shape

(27604, 65)

In [64]:
# I only want to focus on currently HPSAs that have NOT been withdrawn
# Get info about current HPSA status
hpsa['HPSA Status'].value_counts()

Withdrawn                  20950
Designated                  3646
Proposed For Withdrawal     3008
Name: HPSA Status, dtype: int64

In [65]:
# Keep only the rows that have a status of Designated or Proposed For Withdrawal
hpsa = hpsa[hpsa['HPSA Status'] != 'Withdrawn']

# Check shape to make sure indexing worked
hpsa.shape

(6654, 65)

In [67]:
# I only want to focus on single counties
# Get info about component type (i.e. type of geography)
hpsa['HPSA Component Type Description'].value_counts()

Census Tract          3891
County Subdivision    1651
Single County         1112
Name: HPSA Component Type Description, dtype: int64

In [69]:
# Keep only the rows that have a component type of Single County
hpsa = hpsa[hpsa['HPSA Component Type Description'] == 'Single County']

# Check shape to make sure indexing worked
hpsa.shape

(1112, 65)

#### Drop unnecessary columns

In [None]:
# Identify columns to be dropped
cols_to_drop = ['HPSA Discipline Class', 'PC MCTA Score']

In [76]:
# Check which states are represented
hpsa['Primary State Abbreviation'].unique()

array(['TX', 'OH', 'WI', 'WA', 'SD', 'OK', 'NC', 'MH', 'FM', 'AS', 'WY',
       'VA', 'UT', 'PA', 'NV', 'MS', 'MN', 'MI', 'KS', 'IA', 'IN', 'MO',
       'LA', 'NY', 'GA', 'CO', 'AK', 'IL', 'HI', 'AR', 'FL', 'PW', 'MP',
       'TN', 'OR', 'ND', 'WV', 'SC', 'MT', 'NM', 'NE', 'KY', 'MA', 'CA',
       'AL', 'ID', 'MD', 'VI'], dtype=object)