In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pathlib import Path

In [None]:
#load in average readmissions data and read
average_readmission_clean_csv = Path("Output/Average_Readmission_Clean.csv")
average_readmission_clean = pd.read_csv(average_readmission_clean_csv)
average_readmission_clean

In [None]:
#convert to dataframe
average_readmission_clean_df = pd.DataFrame(average_readmission_clean)
#find average readmission rate per state --- so we can find top 5 states and bottom 5 states (%readmisson)
avg_readmissions_by_state = average_readmission_clean_df.groupby('state')['% Readmission'].mean()
avg_read_state_df = pd.DataFrame(avg_readmissions_by_state)
avg_read_state_df.head(10)

In [None]:
plt.figure(figsize=(12, 5))
plt.bar(avg_read_state_df.index, avg_read_state_df['% Readmission'], color='skyblue', width=0.6)
plt.xlabel('State')
plt.ylabel('% Readmission')
plt.title('All States by Average Readmission Percentage')
plt.xticks(rotation=90, ha='right')
plt.tight_layout()
plt.show()

In [None]:
#Get top 5 states by %Readmission
top_sorted_states = avg_readmissions_by_state.sort_values(ascending=False)
# Get the top 5 states
top_4_states = top_sorted_states.head(4)
top_4_states_df = pd.DataFrame(top_4_states)
top_4_states_df

In [None]:
#Get bottom 5 states by %Readmission
bottom_sorted_states = avg_readmissions_by_state.sort_values(ascending=True)
# Get the bottom 5 states
bottom_4_states = bottom_sorted_states.head(4)
bottom_4_states_df = pd.DataFrame(bottom_4_states)
bottom_4_states_df

In [None]:
plt.figure(figsize=(12, 6))

# Plot bottom_5 data with green bars
plt.bar(bottom_4_states_df.index, bottom_4_states_df['% Readmission'], color='red', width=0.8, label='Best 4 States (%Readmission)')

# Plot top_5 data with blue bars
plt.bar(top_4_states_df.index, top_4_states_df['% Readmission'], color='skyblue', width=0.8, label='Worst 4 States (%Readmission)')

# Adding labels and title
plt.xlabel('State')
plt.ylabel('% Readmission')
plt.title('Readmission Percentage by State (Worst 4 vs Best 4)')
plt.xticks(rotation=45, ha='right')
plt.legend()  # Show legend with labels for each set of bars
plt.tight_layout()
plt.show()

In [None]:
bottom_4_states = ['IDAHO', 'HAWAII', 'ALASKA', 'UTAH']

filtered_df1 = average_readmission_clean_df[average_readmission_clean_df['state'].str.upper().isin(bottom_4_states)]
filtered_df1

In [None]:
states = filtered_df1['state'].unique()
races = filtered_df1['Race'].unique()

# Group the dataframe by state and race
grouped_df = filtered_df1.groupby(['state', 'Race']).mean().reset_index()

# Create the chart
plt.figure(figsize=(12, 8))
bar_width = 0.1
index = range(len(states))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

# Plot each race for each state
for i, race in enumerate(races):
    race_data = grouped_df[grouped_df['Race'] == race]
    plt.bar([p + i * bar_width for p in index], race_data['% Readmission'], bar_width, label=race, color=colors[i])
    # Add labels on top each bar
    for j, percentage in enumerate(race_data['% Readmission']):
        plt.text(index[j] + i * bar_width, percentage + 0.2, f'{percentage:.2f}%', ha='center', va='bottom', rotation=90)

plt.xlabel('States')
plt.ylabel('% Readmission')
plt.title('Readmission Percentage by Race (Best 4 States)')
plt.xticks([p + bar_width * len(states) / 2 for p in index], states)
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.ylim(0, 17.5)
plt.tight_layout()
plt.show()

In [None]:
top_4_states = ['DISTRICT OF COLUMBIA', 'MASSACHUSETTS', 'NEW JERSEY', 'NEW YORK']

filtered_df2 = average_readmission_clean_df[average_readmission_clean_df['state'].str.upper().isin(top_4_states)]
filtered_df2

In [None]:
states = filtered_df2['state'].unique()
races = filtered_df2['Race'].unique()

# Group the dataframe by state and race
grouped_df = filtered_df2.groupby(['state', 'Race']).mean().reset_index()

# Create bar chart
plt.figure(figsize=(12, 8))
bar_width = 0.1
index = range(len(states))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

# Plot each race for each state
for i, race in enumerate(races):
    race_data = grouped_df[grouped_df['Race'] == race]
    plt.bar([p + i * bar_width for p in index], race_data['% Readmission'], bar_width, label=race, color=colors[i])
    # Add labels on top each bar
    for j, percentage in enumerate(race_data['% Readmission']):
        plt.text(index[j] + i * bar_width, percentage + 0.2, f'{percentage:.2f}%', ha='center', va='bottom', rotation=90)

plt.xlabel('States')
plt.ylabel('% Readmission')
plt.title('Readmission Percentage by Race (Worst 4 States)')
plt.xticks([p + bar_width * len(states) / 2 for p in index], states)
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.ylim(0, 21)
plt.tight_layout()
plt.show()

In [None]:
states = filtered_df2['state'].unique()
races = filtered_df2['Race'].unique()

# Group dataframe by state and race
grouped_df = filtered_df2.groupby(['state', 'Race']).mean().reset_index()

#create chart
plt.figure(figsize=(12, 8))

bar_width = 0.5
index = range(len(states))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
bottom = [0] * len(states)

# Plot each race for each state
for i, race in enumerate(races):
    race_data = grouped_df[grouped_df['Race'] == race]
    race_percentages = race_data['% Readmission']
    plt.bar(index, race_percentages, bar_width, label=race, color=colors[i], bottom=bottom)
    # Add labels within bars
    for j, percentage in enumerate(race_percentages):
        plt.text(index[j], bottom[j] + percentage / 2, f'{percentage:.2f}%', ha='center', va='center', color='white')
    bottom = [bottom[j] + percentage for j, percentage in enumerate(race_percentages)]

plt.xlabel('States')
plt.ylabel('% Readmission')
plt.title('Readmission Percentages by Race (Worst 4 States)')
plt.xticks(index, states)
plt.legend(loc='best')
plt.xticks(rotation=45)
plt.ylim(0, 132) 
plt.tight_layout()
plt.show()

In [None]:
#load in average readmissions data and read
state_census_clean_csv = Path("Output/Seleceted_States.csv")
state_census_clean = pd.read_csv(state_census_clean_csv)
# state_census_clean

In [None]:
# Choose specific states to plot
states_to_plot = ["Alaska", "Utah", "Hawaii", "Idaho"]

# Filter data for the chosen states
state_data = state_census_clean[state_census_clean['State'].isin(states_to_plot)]

# Get unique income brackets
income_brackets = state_census_clean.columns[3:]

# Create a dictionary to store data for each state
state_data_dict = {}
for state in states_to_plot:
    state_data_dict[state] = state_data[state_data['State'] == state].set_index('Race')[income_brackets].T.to_dict()

# Plot multi-bar graphs for each state
num_states = len(states_to_plot)
num_rows = 2
num_cols = (num_states + 1) // num_rows

fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 8))

for i, state in enumerate(states_to_plot):
    ax = axes[i // num_cols, i % num_cols]
    races = list(state_data_dict[state].keys())
    num_races = len(races)
    bar_width = 0.2
    index = np.arange(len(income_brackets))
    
    for j, race in enumerate(races):
        bar_positions = index + j * bar_width - (bar_width * (num_races - 1) / 2)
        race_data = list(state_data_dict[state][race].values())
        ax.bar(bar_positions, race_data, bar_width, label=race)
    
    ax.set_title(state)
    ax.set_xlabel("Income Bracket")
    ax.set_ylabel("Total Households")
    ax.set_xticks(index)
    ax.set_xticklabels(income_brackets, rotation=45)
    ax.legend()

# Adjust layout

plt.tight_layout()
plt.show()


In [None]:
# Choose specific states to plot
states_to_plot = ["Alaska", "Utah", "Hawaii", "Idaho"]

# Filter data for the chosen states
state_data = state_census_clean[state_census_clean['State'].isin(states_to_plot)]

# Group data by state and race
top_readmission_data = state_data.groupby(['State', 'Race'])

# Get unique income brackets
income_brackets = state_census_clean.columns[3:]

# Plot bar charts for the chosen states
num_states = len(states_to_plot)
num_rows = 2
num_cols = (num_states + 1) // num_rows

fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 8))

for (state, race), top_readmission_df in top_readmission_data:
    index = states_to_plot.index(state)
    ax = axes[index // num_cols, index % num_cols]
    
    # Calculate total households for each income bracket
    total_households = top_readmission_df[income_brackets].sum()
    
    # Plot bar chart
    ax.bar(income_brackets, total_households, label=race)
    ax.set_title(f"{state}")
    ax.set_xlabel("Income Bracket")
    ax.set_ylabel("Total Households")
    ax.tick_params(axis='x', rotation=45)
    ax.legend()

# Adjust layout

plt.tight_layout()
plt.show()


In [None]:
# Choose specific states to plot
states_to_plot = ["New York", "New Jersey", "District of Columbia", "Massachusetts"]

# Filter data for the chosen states
state_data = state_census_clean[state_census_clean['State'].isin(states_to_plot)]

# Get unique income brackets
income_brackets = state_census_clean.columns[3:]

# Create a dictionary to store data for each state
state_data_dict = {}
for state in states_to_plot:
    state_data_dict[state] = state_data[state_data['State'] == state].set_index('Race')[income_brackets].T.to_dict()

# Plot multi-bar graphs for each state
num_states = len(states_to_plot)
num_rows = 2
num_cols = (num_states + 1) // num_rows

fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 8))

for i, state in enumerate(states_to_plot):
    ax = axes[i // num_cols, i % num_cols]
    races = list(state_data_dict[state].keys())
    num_races = len(races)
    bar_width = 0.2
    index = np.arange(len(income_brackets))
    
    for j, race in enumerate(races):
        bar_positions = index + j * bar_width - (bar_width * (num_races - 1) / 2)
        race_data = list(state_data_dict[state][race].values())
        ax.bar(bar_positions, race_data, bar_width, label=race)
    
    ax.set_title(state)
    ax.set_xlabel("Income Bracket")
    ax.set_ylabel("Total Households")
    ax.set_xticks(index)
    ax.set_xticklabels(income_brackets, rotation=45)
    ax.legend()

# Adjust layout

plt.tight_layout()
plt.show()


In [None]:
state_data.describe()