In [1]:
import sys
import os
from prefect import task, Flow

abspath = os.path.dirname(os.path.normpath(os.path.abspath(os.path.dirname(''))))

sys.path.append(abspath)

driver_path = os.path.join(abspath, 'drivers', 'chromedriver.exe')

raw_download_path = os.path.normpath(os.path.join(abspath, 'data', 'raw'))

curate_download_path = os.path.normpath(os.path.join(abspath, 'data', 'curated'))

from process.raw import *

from process.curate import *


In [2]:
hospital_urls = get_source_urls(driver_path, abspath)

In [None]:
""" Curated Standard Charge Data """
## Duke
df_duke = get_duke(hospital_urls, raw_download_path)

curate_duke_df = curate_duke(df_duke, curate_download_path)

## Cone
df_cone = get_cone(hospital_urls, raw_download_path)

curate_cone_df = curate_cone(df_cone, curate_download_path)

## North Carolina Baptist
df_ncb = get_ncb(hospital_urls, raw_download_path)

curate_ncb_df = curate_ncb(df_ncb, curate_download_path)

## Novant
df_novant = get_novant(hospital_urls, raw_download_path)

curate_novant_df = curate_novant(df_novant, curate_download_path)

## App Regional
df_cdm_app, df_drg_app, df_shop_app = get_app(hospital_urls, raw_download_path)

curate_app_df = curate_app_shoppable(df_shop_app, curate_download_path)

## NHRMC
df_nhrmc_op, df_nhrmc_ip = get_nhrmc(hospital_urls, raw_download_path)

curate_nhrmc_df = curate_nhrmc(df_nhrmc_op, df_nhrmc_ip, curate_download_path)

## Catawba
df_catawba = get_catawba(hospital_urls,  raw_download_path)

curate_catawba_df = curate_catawba(df_catawba, curate_download_path)

## Northern 
df_northern = get_northern(hospital_urls, raw_download_path)

curated_north_df = curate_northern(df_northern, curate_download_path)

## WakeMed 
df_wakemed = get_wakemed(hospital_urls, raw_download_path, driver_path)

curated_wakemed = curate_wakemed(df_wakemed, curate_download_path)

## First Health 
df_first =  get_first(hospital_urls, raw_download_path) #need to pivot data on payer

curate_first_df = curate_first(df_first, curate_download_path)



In [13]:
## Atrium (!!! Need Curate)

def get_atrium(hospital_urls: dict, raw_download_path: str, hospital_id='atrium-health') -> pd.DataFrame:

	"""Get Atrium Health data from url"""

	url_list = hospital_urls[hospital_id]

	download_path = os.path.join(raw_download_path, hospital_id)
	
	create_directory(download_path)

	df_list = []

	for url in url_list:

		filename = url.split('/')[-1].replace('.json', '.csv')

		# download the file

		response = get_url_data(url, is_request=True)

		# create pandas dataframe from response

		df = pd.read_json(response.data)

		df.replace(r'', np.nan, inplace=True)
		
		df['Filename'] = filename

		try:
			df = df[["Procedure", "Code Type", "Code", "Rev Code", "Procedure Description", "Min /Max", " Inpatient Gross Charge ", " Inpatient Negotiated Charge ", " Outpatient Gross Charge ", " Outpatient Negotiated Charge ", "TabName", "Quantity", "Payer", "Plan(s)",  " Inpatient Discounted Charge ", " Outpatient Discounted Charge ", "Plan", "Product", " Gross Charge - Facility ", " Negotiated Charge - Facility ", " Gross Charge - Non-Facility ", " Negotiated Charge - Non-Facility "]]
		except Exception as e:
			print(filename)
			print(e)

		# df.to_csv(os.path.join(download_path, filename), index=False)


		# df_list.append(df)

	
	return None

df_atrium = get_atrium(hospital_urls, raw_download_path)


56-1398929_AtriumHealthBehavioralHealth_StandardCharges.csv
"[' Outpatient Negotiated Charge ', 'Plan', ' Inpatient Negotiated Charge ', 'Min /Max'] not in index"
56-0529945_AtriumHealthUniversity_StandardCharges.csv
"['Plan'] not in index"
56-0529945_AtriumHealthAnson_StandardCharges.csv
"['Plan'] not in index"
56-0529945_AtriumHealthCleveland_StandardCharges.csv
"['Plan'] not in index"
56-0529945_AtriumHealthLincoln_StandardCharges.csv
"['Plan'] not in index"


In [None]:
df_atrium.head(10)

In [None]:
##  Vidant (!!! Need Curate)
# df_vidant = get_vidant(hospital_urls, raw_download_path)


In [None]:
""" Below only contain Standard Charges per Payor so we will only curate the raw """

## Iredell
# df_cdm_iredell, df_drg_iredell, df_drg_internet =  get_iredell(hospital_urls, raw_download_path) # Get's charge master but not standard charges

## Mission
# df_mission = get_mission(hospital_urls, raw_download_path) #Get's charge master but not standard charges

## Cateret 
# df_comp_cateret, df_desc_cateret = get_cateret(hospital_urls,  raw_download_path) #Get's charge master not standard charges
