# EPIC 4 - Become a Carer - T29.1 - Carer Status by Sex, Age
This Jupyter Notebook is about preparing 'Number of primary carers, by age and sex, 2018' CSV file for being used in Mo-Buddy Website Solution.
1. Read Raw Data
2. Clean Raw Data
3. Export Clean Data

- Table_29.1 - Carer status by sex, age
- Table_30.1 - Carer status, recipient, disability status, age, sex
- Table_31.1 - Carer status, by geographic location, age, sex
- Table_32.1 - 15-.. yo, carer status, sex 
- Table_33.1 - Employed 15-64 yo, carer status, sex 
- Table_34.1 - Primary Carer, recipient, age, sex
- Table_35.1 - Carer and recipient living in or other households, age
- Table_36.1 - Primary Carer, Time spending in care, sex  ****
- Table_37.1 - Primary Carer, age, time spending in care, disability status, ***** 
- Table_38.1 - Primary Carer, time spending in care, select recipint   ****
- Table_39.1 - Primary Carer, reason for taking a carer, sex   ****
- Table_40.1 - Primary Carer, reason for taking a carer, age of recipient ****
- Table_41.1 - Primary Carer, satisfaction of service recieved, sex, age  ****
- Table_42.1 - Primary Carer, social community participation with recipient, time spending in car, age  ****
- Table_43.1 - Primary Carer, social community participation without recipient, time spending in car, age  ****

In [29]:
# Import Packages
import pandas as pd
import itertools
import re

In [30]:
# Set option to display all columns
pd.set_option('display.max_columns', None)

## 1. Read in Raw Data from a XLS file

In [31]:
# Function for reading in raw data from a XLS file
def read_in_data(file_path, sheet_name):
    """
    Function for reading in raw data from XLS file.
    Inputs: 
        - file_path, type: string, desc: XLS file path
        - sheet_name, type: string, desc: Sheet Name
    Outputs:
        - raw_data, type: dataframe, desc: Raw data
    """

    raw_data = pd.read_excel(io=file_path, sheet_name=sheet_name)
    
    return raw_data

In [32]:
# Read in data
filepath_raw_data = 'DataBases/44300do030.xls'
sheet_name = 'Table_29.1'
df_raw_carer_29 = read_in_data(filepath_raw_data, sheet_name)

In [33]:
# Check how the dataframe looks like
df_raw_carer_29

Unnamed: 0,Australian Bureau of Statistics,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,"44300DO030_2018 Disability, Ageing and Carers,...",,,,,
1,Released at 11.30am (Canberra time) Thurs 24 O...,,,,,
2,"Table 29.1 All persons, living in households, ...",,,,,
3,Age group (years),Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
4,"ESTIMATE ('000, years)",,,,,
5,Males,,,,,
6,Less than 15,,20.2,20.2,2372.5,2393.2
7,15-24,7,86.8,95.4,1502.8,1597.7
8,25-34,14.4,98.9,116.5,1700.1,1814.5
9,35-44,22.6,110.1,132.2,1486,1618.6


## 2. Clean up Raw Data

In [34]:
# Drop useless rows and columns
col_names = df_raw_carer_29.iloc[3,:]
df_carer_29 = df_raw_carer_29.copy()
df_carer_29.drop(labels=[0,1,2,4,5,16,17,28,29,40,41,42], axis=0, inplace=True)
df_carer_29.reset_index(drop=True, inplace=True)
df_carer_29.columns = col_names
df_carer_29.columns.names = ['']
df_carer_29

Unnamed: 0,Age group (years),Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
0,Age group (years),Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
1,Less than 15,,20.2,20.2,2372.5,2393.2
2,15-24,7,86.8,95.4,1502.8,1597.7
3,25-34,14.4,98.9,116.5,1700.1,1814.5
4,35-44,22.6,110.1,132.2,1486,1618.6
5,45-54,50.1,164.5,218.9,1318.1,1536.6
6,55-64,57,170.6,228.5,1162.5,1390.2
7,65-74,46.5,138,185.4,888.6,1074.5
8,75 and over,42.7,92.7,132.4,561,696.5
9,Total,241.9,887.6,1128.6,10992.7,12123.1


In [35]:
# Reset index and prepare for adding multiindex (in column)
df_carer_29.rename(columns={df_carer_29.columns[0]:'Index'}, inplace=True)
df_carer_29.set_index(keys='Index', drop=True, inplace=True)
df_carer_29

Unnamed: 0_level_0,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Age group (years),Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
Less than 15,,20.2,20.2,2372.5,2393.2
15-24,7,86.8,95.4,1502.8,1597.7
25-34,14.4,98.9,116.5,1700.1,1814.5
35-44,22.6,110.1,132.2,1486,1618.6
45-54,50.1,164.5,218.9,1318.1,1536.6
55-64,57,170.6,228.5,1162.5,1390.2
65-74,46.5,138,185.4,888.6,1074.5
75 and over,42.7,92.7,132.4,561,696.5
Total,241.9,887.6,1128.6,10992.7,12123.1


In [36]:
# Adding multiIndex (in column)
# first_level = ['Males', 'Females', 'All persons']
# second_level = list(df_carer_29.columns.unique())
# levels = [first_level, second_level]
# df_carer_29.columns = pd.MultiIndex.from_product(levels, names=['Gender', 'Age'])
df_carer_29.reset_index(inplace=True)
df_carer_29

Unnamed: 0,Index,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
0,Age group (years),Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
1,Less than 15,,20.2,20.2,2372.5,2393.2
2,15-24,7,86.8,95.4,1502.8,1597.7
3,25-34,14.4,98.9,116.5,1700.1,1814.5
4,35-44,22.6,110.1,132.2,1486,1618.6
5,45-54,50.1,164.5,218.9,1318.1,1536.6
6,55-64,57,170.6,228.5,1162.5,1390.2
7,65-74,46.5,138,185.4,888.6,1074.5
8,75 and over,42.7,92.7,132.4,561,696.5
9,Total,241.9,887.6,1128.6,10992.7,12123.1


In [37]:
# Dropping useless rows
df_carer_29_1 = df_carer_29.copy()
df_carer_29_1.drop(labels=[0] , axis=0, inplace=True)
df_carer_29_1.reset_index(drop=True, inplace=True)
df_carer_29_1

Unnamed: 0,Index,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
0,Less than 15,,20.2,20.2,2372.5,2393.2
1,15-24,7.0,86.8,95.4,1502.8,1597.7
2,25-34,14.4,98.9,116.5,1700.1,1814.5
3,35-44,22.6,110.1,132.2,1486.0,1618.6
4,45-54,50.1,164.5,218.9,1318.1,1536.6
5,55-64,57.0,170.6,228.5,1162.5,1390.2
6,65-74,46.5,138.0,185.4,888.6,1074.5
7,75 and over,42.7,92.7,132.4,561.0,696.5
8,Total,241.9,887.6,1128.6,10992.7,12123.1
9,Average age (years),58.4,50.4,52.1,36.0,37.5


In [38]:
levels_names = df_carer_29_1['Index'].unique()
levels_names

array(['Less than 15', '15-24', '25-34', '35-44', '45-54', '55-64',
       '65-74', '75 and over', 'Total', 'Average age (years)'],
      dtype=object)

In [39]:
first_level = ['Males', 'Females', 'All persons']
first_level

['Males', 'Females', 'All persons']

In [40]:
second_level = levels_names
second_level

array(['Less than 15', '15-24', '25-34', '35-44', '45-54', '55-64',
       '65-74', '75 and over', 'Total', 'Average age (years)'],
      dtype=object)

In [41]:
# Adding multiIndex (in index)
levels_all = list(itertools.product(first_level, second_level))
multi_index = pd.MultiIndex.from_tuples(levels_all, names=['Sex', 'Age'])
df_carer_29_1.set_index(keys=multi_index, drop=True, inplace=True)
df_carer_29_1.drop(columns='Index', axis=1, inplace=True)
df_carer_29_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
Sex,Age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Males,Less than 15,,20.2,20.2,2372.5,2393.2
Males,15-24,7.0,86.8,95.4,1502.8,1597.7
Males,25-34,14.4,98.9,116.5,1700.1,1814.5
Males,35-44,22.6,110.1,132.2,1486.0,1618.6
Males,45-54,50.1,164.5,218.9,1318.1,1536.6
Males,55-64,57.0,170.6,228.5,1162.5,1390.2
Males,65-74,46.5,138.0,185.4,888.6,1074.5
Males,75 and over,42.7,92.7,132.4,561.0,696.5
Males,Total,241.9,887.6,1128.6,10992.7,12123.1
Males,Average age (years),58.4,50.4,52.1,36.0,37.5


In [42]:
# Inpute missing values
nan_index = df_carer_29_1['Primary carer'].isna()
df_carer_29_1.loc[nan_index, 'Primary carer'] = 0
df_carer_29_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
Sex,Age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Males,Less than 15,0.0,20.2,20.2,2372.5,2393.2
Males,15-24,7.0,86.8,95.4,1502.8,1597.7
Males,25-34,14.4,98.9,116.5,1700.1,1814.5
Males,35-44,22.6,110.1,132.2,1486.0,1618.6
Males,45-54,50.1,164.5,218.9,1318.1,1536.6
Males,55-64,57.0,170.6,228.5,1162.5,1390.2
Males,65-74,46.5,138.0,185.4,888.6,1074.5
Males,75 and over,42.7,92.7,132.4,561.0,696.5
Males,Total,241.9,887.6,1128.6,10992.7,12123.1
Males,Average age (years),58.4,50.4,52.1,36.0,37.5


## 3. Export Clean Data to a CSV file

In [43]:
# Export full version
df_carer_29_1.to_csv('Output\EPIC4_T29_1_V1.csv', index=True)