#### Information about Nursing Home Residents in Pennsylvania
In this script we are doing Agewsise and Genderwise analysis of the Residents in Pennsylvania Nursing Homes

In [19]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import scipy.stats as sts
import pandas as pd
import requests
import import_ipynb
from statewise import getCleanProviderInfo
from statewise import getProvidersBestRatedinPA

#### Data Retrieval and clean up

In [20]:
# Retrieve the Resident Census data for Pennsylvania State Nursing Homes
# And clean up the dataset
def getPAResidentCensus():    
    # Read the fifth sheet from the PA state excel file data
    read_pennstateinfo = pd.read_excel("../Resources/Nursing_Home_Report_2018_2019.xlsx",sheet_name="Nursing Home Report 5", skiprows=6)
    #R Rename the coulumns with a shorter name
    pa_resident_census = read_pennstateinfo.rename(columns={
        'Facility Name' : 'Prov Name',
        'Men under 18 years of age' : 'Men below 18',
        'Men between 18 and 44 years of age' : 'Men 18 to 44',
        'Men between 45 and 59 years of age' : 'Men 45 to 59',
        'Men between 60 and 64 years of age' : 'Men 60 to 64',
        'Men between 65 and 69 years of age' : 'Men 65 to 69',
        'Men between 70 and 74 years of age' : 'Men 70 to 74',
        'Men between 75 and 79 years of age' : 'Men 75 to 79',
        'Men between 80 and 84 years of age' : 'Men 80 to 84',
        'Men between 85 and 89 years of age' : 'Men 85 to 89',
        'Men between 90 and 94 years of age' : 'Men 90 to 94',
        'Men between 95 and 99 years of age' : 'Men 99 to 99', 
        'Men 100 years of age and older' : 'Men above 100',
        'Total Male Resident Census' : 'Men total', 
        'Women under 18 years of age' : 'Women below 18',
        'Women between 18 and 44 years of age' : 'Women 18 to 44',
        'Women between 45 and 59 years of age' : 'Women 45 to 59',
        'Women between 60 and 64 years of age' : 'Women 60 to 64',
        'Women between 65 and 69 years of age' : 'Women 65 to 69',
        'Women between 70 and 74 years of age' : 'Women 70 to 74',
        'Women between 75 and 79 years of age' : 'Women 75 to 79',
        'Women between 80 and 84 years of age' : 'Women 80 to 84',
        'Women between 85 and 89 years of age' : 'Women 85 to 89',
        'Women between 90 and 94 years of age' : 'Women 90 to 94',
        'Women between 95 and 99 years of age' : 'Women 99 to 99', 
        'Women 100 years of age and older' : 'Women above 100',
        'Total Female Resident Census' : 'Women total',
        'Facility Resident Total under 18 years of age' : 'Resident below 18',
        'Facility Resident Total between 18 and 44 years of age' : 'Resident 18 to 44',
        'Facility Resident Total between 45 and 59 years of age' : 'Resident 45 to 59',
        'Facility Resident Total between 60 and 64 years of age' : 'Resident 60 to 64',
        'Facility Resident Total between 65 and 69 years of age' : 'Resident 65 to 69',
        'Facility Resident Total between 70 and 74 years of age' : 'Resident 70 to 74',
        'Facility Resident Total between 75 and 79 years of age' : 'Resident 75 to 79',
        'Facility Resident Total between 80 and 84 years of age' : 'Resident 80 to 84',
        'Facility Resident Total between 85 and 89 years of age' : 'Resident 85 to 89',
        'Facility Resident Total between 90 and 94 years of age' : 'Resident 90 to 94',
        'Facility Resident Total between 95 and 99 years of age' : 'Resident 95 to 99', 
        'Facility Resident Total 100 years of age and older' : 'Resident above 100',
        'Total Facility Resident Census' : 'Resident total'})
    
    #Clean up the data retrieved
    pa_resident_census.dropna(axis=0, inplace=True)

    # The resident count values are saved in the original dataset as strings.
    # Convert all agewise the resident count string values as numeric
    for i in range(2 , len(pa_resident_census.columns)):
        col_name = pa_resident_census.columns[i]
        # Convert to numeric and save the non numeric data values as null
        pa_resident_census[col_name] = pd.to_numeric(pa_resident_census[col_name], errors='coerce')

    # Locate the providers with resident count as null values
    pa_resident_census.loc[pa_resident_census['Men total'].isnull()]
    # Delete the providers with resident count as null values 
    pa_resident_census.dropna(axis=0, inplace=True)
    
    # Convert the resident count float values to integer
    for i in range(2 , len(pa_resident_census.columns)):
        col_name = pa_resident_census.columns[i]
        # Convert to numeric and save the non numeric data values as null
        pa_resident_census[col_name] = pa_resident_census[col_name].astype(int)
        
    return (pa_resident_census)

### Combinig the data 

In [22]:
# Merge PA nursing home residents census,  and PA provider info
def mergePAProviderResidentCensus(pennstate_census_data, prov_all_ratings):

    # Total number of residents in each county in PA genderwise
    pennstate_census_data['County'] = pennstate_census_data['County'].str.title()
    pennstate_census_data['Prov Name'] = pennstate_census_data['Prov Name'].str.upper()

    pa_residents_df = pennstate_census_data.groupby(['County', 'Prov Name']).sum().reset_index()

#     prov_pa_bestrated = bestrated_df[['County','Prov Name','Overall Rating']]
#     prov_pa_bestrated

    # Retrieve all the nursing homes in PA state
    prov_all_ratings_all = prov_all_ratings.loc[:,['County', 'Prov Name', 'Overall Rating', 'State']].copy()
    prov_all_ratings_PA = prov_all_ratings_all.loc[prov_all_ratings_all['State']=='PA'].copy()

    prov_all_ratings_PA_df = prov_all_ratings_PA.groupby(['County', 'Prov Name', 'Overall Rating']).count().reset_index()
    del prov_all_ratings_PA_df['State']
    
    # Merge the two datasets
    merge_df = pd.merge(pa_residents_df, prov_all_ratings_PA_df, on=['County', 'Prov Name'], how='inner')

    return (merge_df)

#### Gender wise Analysis of Best Rated Nursing Homes in PA

In [1]:
# Draw pie chart showing the number of men and women in the best rated (rated 4 & 5) nursing homes in PA
def drawPie_GenderwiseBestrated(pa_merged_data):
    bestrated_df = pa_merged_data.loc[pa_merged_data['Overall Rating']>=4]
    br_total_men = round(bestrated_df['Men total'].sum(),0).astype(int)
    br_total_women = round(bestrated_df['Women total'].sum(),0).astype(int)
    br_gender = 'Men','Women'
    br_resident_count = [br_total_men, br_total_women]
    labels = [ f'Male : {br_total_men}' , f'Female : {br_total_women}']
    colors = ['blue', 'magenta']
    plt.title("Men vs Women in Best rated PA Nursing Homes")
    plt.pie(br_resident_count, labels=labels, colors=colors, autopct="%1.1f%%", startangle=110, explode=(0,0.06), shadow=True)
    plt.axis("equal")
    plt.style.use('grayscale')
    plt.savefig("../Output/genderbr.png")
    plt.show()

#### Gender wise Analysis of Least Rated Nursing Homes in PA

In [28]:
# Draw pie chart showing the number of men and women in the low rated (rated <= 3) nursing homes in PA
def drawPie_GenderwiseLowrated(pa_resident_data):
    lowrated_df = pa_resident_data.loc[pa_resident_data['Overall Rating']<4]
    lr_total_men = lowrated_df['Men total'].sum()
    lr_total_women = lowrated_df['Women total'].sum()
    lr_gender = 'Men','Women'
    lr_resident_count = [lr_total_men, lr_total_women]
    labels = [ f'Male : {lr_total_men}' , f'Female : {lr_total_women}']
    colors = ['teal', 'coral']
    plt.pie(lr_resident_count, labels=labels, colors=colors,  explode=(0,0.06), autopct='%1.1f%%', shadow=True, startangle=140)
    plt.title("Men vs Women in Low rated PA Nursing Homes")
    plt.axis("equal")
    plt.style.use('fivethirtyeight')
    plt.savefig("../Output/genderlr.png")
    plt.show()