In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pathlib import Path


In [2]:
# File to Load (Remember to Change These)
census_csv = Path("Resources/usethisfile_ACSDP5YSPT2021.DP03-Data.csv")
# print(census_csv)
readmissions_csv = Path("Resources/USETHIS_cms_readmission.csv")
# print(readmissions_csv)


In [3]:
readmission_data = pd.read_csv(readmissions_csv)
readmission_data = readmission_data.rename(columns={'area_type': 'Area', 'primary_race': 'Race', 'value': '% Readmission'})
readmission_data.set_index("state", inplace = True)

# readmission_data = df.sort_values(by='column_name', ascending=True)
readmission_data.head(15)

Unnamed: 0_level_0,year,county,Area,Race,% Readmission
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALABAMA,2020,Autauga County,Urban,White,15
ALABAMA,2020,Baldwin County,Rural,White,16
ALABAMA,2020,Barbour County,Rural,White,16
ALABAMA,2020,Bibb County,Urban,White,15
ALABAMA,2020,Bullock County,Rural,White,16
ALABAMA,2020,Butler County,Rural,White,15
ALABAMA,2020,Calhoun County,Urban,White,15
ALABAMA,2020,Cherokee County,Rural,White,15
ALABAMA,2020,Choctaw County,Rural,White,15
ALABAMA,2020,Clarke County,Rural,White,16


In [4]:
avg_percentage_readmission = readmission_data.groupby(["state","Race"])["% Readmission"].mean()
avg_percentage_readmission.to_csv("Output/Average_Readmission_Clean.csv", header=True)
avg_percentage_readmission

state    Race                         
ALABAMA  American Indian/Alaska native    15.767442
         Asian/Pacific islander           15.480000
         Black                            15.403689
         Hispanic                         15.300654
         Other                            15.555556
                                            ...    
WYOMING  Asian/Pacific islander           15.181818
         Black                            15.000000
         Hispanic                         14.988764
         Other                            15.062500
         White                            15.242424
Name: % Readmission, Length: 308, dtype: float64

In [5]:
#For Later use
census_data = pd.read_csv(census_csv)

census_data.head()

Unnamed: 0,GEO_ID,NAME,POPGROUP,POPGROUP_LABEL,DP03_0001E,DP03_0001M,DP03_0002E,DP03_0002M,DP03_0003E,DP03_0003M,...,DP03_0133PE,DP03_0133PM,DP03_0134PE,DP03_0134PM,DP03_0135PE,DP03_0135PM,DP03_0136PE,DP03_0136PM,DP03_0137PE,DP03_0137PM
0,Geography,Geographic Area Name,Race/Ethnic Group,Population Groups,Estimate!!EMPLOYMENT STATUS!!Population 16 yea...,Margin of Error!!EMPLOYMENT STATUS!!Population...,Estimate!!EMPLOYMENT STATUS!!Population 16 yea...,Margin of Error!!EMPLOYMENT STATUS!!Population...,Estimate!!EMPLOYMENT STATUS!!Population 16 yea...,Margin of Error!!EMPLOYMENT STATUS!!Population...,...,Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHO...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHO...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHO...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHO...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHO...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...
1,0400000US02,Alaska,2,White alone,375399,1419,254239,2268,239424,2236,...,7.1,0.4,7.2,0.4,6.5,0.8,4.5,0.4,16.4,1
2,0400000US02,Alaska,4,Black or African American alone,19205,698,14351,778,12108,844,...,8.9,2.1,9.2,2.4,6.3,3.6,9.7,4,16.2,4.6
3,0400000US02,Alaska,6,American Indian and Alaska Native alone,76502,1244,42802,1147,42595,1137,...,21.5,1.1,22.9,1.3,13,2.2,20.6,1.3,38.6,2.6
4,0400000US02,Alaska,12,Asian alone,38486,747,28061,906,27570,951,...,8.7,1.7,8.7,1.9,8.7,3.4,7.1,2.7,19.8,3.5


In [6]:
income_poverty_df = census_data[['NAME','POPGROUP_LABEL', 'DP03_0051E', 'DP03_0052E', 'DP03_0053E', 'DP03_0054E', 'DP03_0055E', 'DP03_0056E','DP03_0057E','DP03_0058E', 'DP03_0059E', 'DP03_0060E', 'DP03_0061E']]
income_poverty_df = income_poverty_df.rename(columns={'NAME': 'State', 'POPGROUP_LABEL': 'Race', 'DP03_0051E':'Total Household', 'DP03_0052E':'<10k', 'DP03_0053E':'10k to 15k', 'DP03_0054E':'15k to 25k', 'DP03_0055E':'25k to 35k', 'DP03_0056E':'35k to 50k','DP03_0057E': '50k to 75k','DP03_0058E': '75k to 100k', 'DP03_0059E':'100k to 150k', 'DP03_0060E':'150k to 200k', 'DP03_0061E': '200k+' })
income_poverty_df = income_poverty_df.drop(income_poverty_df.index[0])
income_poverty_df.to_csv("Output/Clean_Census.csv", index=False)
income_poverty_df.head(20)



Unnamed: 0,State,Race,Total Household,<10k,10k to 15k,15k to 25k,25k to 35k,35k to 50k,50k to 75k,75k to 100k,100k to 150k,150k to 200k,200k+
1,Alaska,White alone,185456,6136,4307,9692,10808,17267,30886,25917,38157,19931,22355
2,Alaska,Black or African American alone,8458,380,211,541,715,895,2309,1105,1062,685,555
3,Alaska,American Indian and Alaska Native alone,28524,2363,1553,3627,2603,3395,5251,3148,3560,1636,1388
4,Alaska,Asian alone,12898,473,370,782,697,1137,2747,1980,2520,1387,805
5,Alaska,Native Hawaiian and Other Pacific Islander alone,2267,21,6,106,95,295,669,285,460,223,107
6,Alaska,Some Other Race alone,4239,206,152,264,270,645,731,632,885,291,163
7,Alaska,Hispanic or Latino (of any race),15760,664,441,920,1190,2387,3346,1985,2874,1022,931
8,District of Columbia,White alone,138443,4233,1294,2352,2965,4845,11908,14892,26509,20163,49282
9,District of Columbia,Black or African American alone,132384,17935,8850,11799,12113,13561,20097,15590,16105,7627,8707
10,District of Columbia,American Indian and Alaska Native alone,1198,116,195,59,43,74,424,95,87,20,85
