## Nursing Homes Across US

In this script, we are retreiving data regarding the Nursing Home Providers in United States. 
This contains functions to read the data, clean up the data and extract the information required for the analysis. 

In [12]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import scipy.stats as sts
import pandas as pd
import requests
pd.set_option('mode.chained_assignment', None)

### Data Retrieval and clean up

In [13]:
def getCleanProviderInfo():   # Reads data from ProviderInfo.csv, cleans it 
    # Read the input data
    getprovdata = pd.read_csv("../Resources/ProviderInfo.csv")
    providerinfo_data = getprovdata[['PROVNUM', 'PROVNAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'County_name', 'Overall_Rating', 'BEDCERT', 'RESTOT']]
    providerinfo_data.rename(columns={
        'PROVNUM' : "Prov No", 
        'PROVNAME' : 'Prov Name', 
        'ADDRESS' : 'Address',
        'CITY' : 'City',
        'STATE' : 'State',
        'ZIP': 'Zip',
        'County_name' : 'County',
        'Overall_Rating' : 'Overall Rating',
        'BEDCERT' : 'No of cert beds', 
        'RESTOT': 'Avg no of residents per day'}, inplace=True)
    providerinfo_data.drop(providerinfo_data[providerinfo_data['Overall Rating'].isnull()].index, inplace = True)
    return (providerinfo_data)

In [14]:
# def drawBar_CountAllStates(provider_df):
#     prov_groupby_state = provider_df.groupby("State")['Prov Name'].count().reset_index(name="count by state")
#     prov_groupby_state = prov_groupby_state.sort_values('count by state', ascending=False)
#     x_values = prov_groupby_state['State']
#     y_values = prov_groupby_state['count by state']
#     plt.figure(figsize=(32,12))
#     plt.bar(x_values, y_values) 
#     plt.title("Nursing Homes Count in United States", fontsize=24)
#     plt.xlabel("States", fontsize=20)
#     plt.ylabel("Number of Nursing Homes", fontsize=20)
#     plt.style.use('seaborn')    
#     plt.show()   

###### Get the top ten best rated nursing homes in US

In [15]:
def getToptenNames(provider_df):
    # Group the providers by state and get the count of providers
    prov_statewise_count = provider_df.groupby(["State"])['Prov Name'].count().reset_index(name="Total")
    
    # Retrieve the providers from the above dataset with ratings 4 & above
    prov_toprated_df = provider_df.loc[provider_df['Overall Rating']>=4]

    # Get the total count of providers by state with ratings 4 & 5
    prov_best_rated_df = prov_toprated_df.groupby('State')['Prov Name'].count().reset_index(name="Top Rated Total")
    
    # Merge the above two datasets and calculate the percentage of providers whaih are best rated per state
    merge_df = pd.merge(prov_best_rated_df, prov_statewise_count, on="State")
    merge_df["Percentage"] = (merge_df['Top Rated Total']/merge_df['Total'])*100
    
    # Sort the dataset in descending order of the total count so as to find the top ten best rated nursing homes/providers
    merge_df_sorted = merge_df.sort_values(["Total", "Top Rated Total"], ascending = False)
    
    # Extract the top ten nursing homes
    prov_topten_nh = merge_df_sorted.head(10).reset_index(drop=True)
    
    # Return the top ten best rated nursing homes
    return (prov_topten_nh)

In [16]:
# import import_ipynb

# from statewise import getCleanProviderInfo
# provider_df
# df2=getToptenNames()
# df2

In [17]:
# def drawBar_TopTenAllStates(provider_info):
#     # Retrieve the data fro the top ten providers
#     nh_rated_topten = getToptenNames(provider_info)        
#     nh_rated_topten.plot(x='State',  y=["Total", "Top Rated Total"], kind='bar', stacked=True)
#     plt.xlabel("States")
#     plt.ylabel("Number of Nursing Homes")
#     plt.legend(["Total Nursing Homes", "Best Rated Nursing Homes"])
#     plt.style.use('bmh')
#     plt.show()

##### Get the Best Rated Nursing homes in Pennsylvania

In [18]:
def getProvidersBestRatedinPA():
    provider_df = getCleanProviderInfo()
    prov_pennstate = provider_df.loc[provider_df['State']=='PA',['Prov Name','County','Overall Rating']].copy()
    prov_bestrated_in_PA = prov_pennstate.loc[prov_pennstate['Overall Rating']>=4]
    prov_bestrated_in_PA.sort_values('Overall Rating',ascending=False, inplace=True)
    prov_bestrated_in_PA.reset_index(drop=True, inplace=True)
    prov_bestrated_in_PA
    return (prov_bestrated_in_PA)

In [19]:
df=getProvidersBestRatedinPA()
df


Unnamed: 0,Prov Name,County,Overall Rating
0,PASSAVANT RETIREMENT AND HEALT,Butler,5.0
1,MANORCARE HEALTH SERVICES-OXFORD VALLEY,Bucks,5.0
2,WILLOWBROOKE COURT SKILLED CARE CENTER AT BRIT...,Montgomery,5.0
3,FELLOWSHIP MANOR,Lehigh,5.0
4,LAKESIDE AT WILLOW VALLEY,Lancaster,5.0
...,...,...,...
321,"VALLEY VIEW HAVEN, INC",Mifflin,4.0
322,SUNNYVIEW NURSING AND REHABILITATION CENTER,Butler,4.0
323,SENECA PLACE,Allegheny,4.0
324,"LAFAYETTE MANOR, INC",Fayette,4.0


##### Get the Least Rated Nursing homes in Pennsylvania

In [20]:
def getProvidersLowRatedinPA():
    provider_df = getCleanProviderInfo()
    prov_pennstate = provider_df.loc[provider_df['State']=='PA'] [['Prov Name','County','Overall Rating']]
    prov_lowrated_in_PA = prov_pennstate.loc[prov_pennstate['Overall Rating']<=3]
    prov_lowrated_in_PA.sort_values('Overall Rating',ascending=False, inplace=True)
    prov_lowrated_in_PA.reset_index(drop=True, inplace=True)
    prov_lowrated_in_PA
    return (prov_lowrated_in_PA)

##### Get the Quality Ratings by Category

In [21]:
# Analyse how the various ratings contirbute to the nursing home quality rating
def getProviderRatings():
    getprovdata = pd.read_csv("../Resources/ProviderInfo.csv")

    # Retrieve the different ratings for the nursing homes
    provider_ratings = getprovdata[['Overall_Rating', 'SURVEY_RATING', 'Quality_Rating', 'LS_Quality_Rating', 
                                     'SS_Quality_Rating', 'Staffing_Rating', 'RN_staffing_rating']]

    # Check if there are null values for the ratings
    provider_ratings[provider_ratings['Quality_Rating'].isnull()]
    
    # Drop the null values from the dataset
    provider_ratings = provider_ratings.dropna(axis=0)
    return (provider_ratings)

In [22]:
# def drawOverallvsOtherRatings():
#     providerinfo_data = getProviderRatings()
#     groupby_rating = providerinfo_data.groupby('Overall_Rating').mean().reset_index()
#     groupby_rating

#     x_axis = groupby_rating['Overall_Rating']

#     y_axis_1 = groupby_rating['SURVEY_RATING']
#     y_axis_2 = groupby_rating['Quality_Rating']
#     y_axis_3 = groupby_rating['Staffing_Rating']
#     y_axis_4 = groupby_rating['RN_staffing_rating']
#     y_axis_5 = groupby_rating['LS_Quality_Rating']
#     y_axis_6 = groupby_rating['SS_Quality_Rating']

#     ratings = ["Survey Rating", "Quality Rating", "Staffing_Rating", "RN_staffing_rating", 'LS_Quality_Rating', 'SS_Quality_Rating']

#     plt.plot(x_axis, y_axis_1, linewidth=1, marker="o", color="r")
#     plt.plot(x_axis, y_axis_2, linewidth=1, marker="s", color="g")
#     plt.plot(x_axis, y_axis_3, linewidth=1, marker="*", color="b")
#     plt.plot(x_axis, y_axis_4, linewidth=1, marker="^", color="y")
#     plt.plot(x_axis, y_axis_5, linewidth=1, marker="^", color="m")
#     plt.plot(x_axis, y_axis_6, linewidth=1, marker="^", color="c")
#     plt.xlabel("Overall Ratings")
#     plt.ylabel("Other Rating")
#     plt.legend(ratings)
#     plt.style.use('bmh')
#     plt.show()