# EPIC 4 - Become a Carer - T34.1 - Primary Carer, Relationship Recipient, Age, Sex
This Jupyter Notebook is about preparing 'Number of primary carers, by age and sex, 2018' XLS file for being used in Mo-Buddy Website Solution.
1. Read Raw Data
2. Clean Raw Data
3. Export Clean Data

- Table_29.1 - Carer status by sex, age
- Table_30.1 - Carer status, recipient, disability status, age, sex
- Table_31.1 - Carer status, by geographic location, age, sex
- Table_32.1 - 15-.. yo, carer status, sex 
- Table_33.1 - Employed 15-64 yo, carer status, sex 
- Table_34.1 - Primary Carer, recipient, age, sex
- Table_35.1 - Carer and recipient living in or other households, age
- Table_36.1 - Primary Carer, Time spending in care, sex  ****
- Table_37.1 - Primary Carer, age, time spending in care, disability status, ***** 
- Table_38.1 - Primary Carer, time spending in care, select recipint   ****
- Table_39.1 - Primary Carer, reason for taking a carer, sex   ****
- Table_40.1 - Primary Carer, reason for taking a carer, age of recipient ****
- Table_41.1 - Primary Carer, satisfaction of service recieved, sex, age  ****
- Table_42.1 - Primary Carer, social community participation with recipient, time spending in car, age  ****
- Table_43.1 - Primary Carer, social community participation without recipient, time spending in car, age  ****

In [1]:
# Import Packages
import pandas as pd
import itertools
# import re

In [2]:
# Set option to display all columns
pd.set_option('display.max_columns', None)

## 1. Read in Raw Data from a XLS file

In [3]:
# Function for reading in raw data from a XLS file
def read_in_data(file_path, sheet_name):
    """
    Function for reading in raw data from XLS file.
    Inputs: 
        - file_path, type: string, desc: XLS file path
        - sheet_name, type: string, desc: Sheet Name
    Outputs:
        - raw_data, type: dataframe, desc: Raw data
    """

    raw_data = pd.read_excel(io=file_path, sheet_name=sheet_name)
    
    return raw_data

In [4]:
# Read in data
filepath_raw_data = 'DataBases/44300do030.xls'
sheet_name = 'Table_34.1'
df_raw_carer_34 = read_in_data(filepath_raw_data, sheet_name)

In [5]:
# Check how the dataframe looks like
df_raw_carer_34

Unnamed: 0,Australian Bureau of Statistics,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,"44300DO030_2018 Disability, Ageing and Carers,...",,,,,
1,Released at 11.30am (Canberra time) Thurs 24 O...,,,,,
2,"Table 34.1 Primary carers, relationship of car...",,,,,
3,,Relationship of carer to main recipient of care,,,,
4,,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,Total
5,ESTIMATE ('000),,,,,
6,Male primary carers,,,,,
7,15–24 years,0,4.4,0,0,7
8,25–44 years,11.8,17.6,6.4,3.8,39
9,45–64 years,46.2,36.6,15.8,11.4,108.2


## 2. Clean up Raw Data

In [6]:
# Drop useless rows and columns
col_names = df_raw_carer_34.iloc[4,:]
df_carer_34 = df_raw_carer_34.copy()
df_carer_34.drop(labels=[0,1,2,3,5,6,12,13,19,20,26,27,28], axis=0, inplace=True)
df_carer_34.reset_index(drop=True, inplace=True)
df_carer_34.columns = col_names
df_carer_34.rename(columns={'Total ':'total'}, inplace=True)
df_carer_34.columns.names = ['']
df_carer_34

Unnamed: 0,NaN,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
0,,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,Total
1,15–24 years,0,4.4,0,0,7
2,25–44 years,11.8,17.6,6.4,3.8,39
3,45–64 years,46.2,36.6,15.8,11.4,108.2
4,65 years and over,73.4,6.1,5.2,6.4,89.4
5,Total,129.6,62.7,27.1,22.1,241.9
6,15–24 years,1.3,2.4,1.2,8,13.7
7,25–44 years,20.7,31.5,99.8,16.9,165.6
8,45–64 years,64.2,110.8,88.3,34.4,298.2
9,65 years and over,99,15.6,18.1,7.7,139.3


In [7]:
# Reset index and prepare for adding multiindex (in column)
df_carer_34.rename(columns={df_carer_34.columns[0]:'Index'}, inplace=True)
df_carer_34.set_index(keys='Index', drop=True, inplace=True)
df_carer_34

Unnamed: 0_level_0,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,Total
15–24 years,0,4.4,0,0,7
25–44 years,11.8,17.6,6.4,3.8,39
45–64 years,46.2,36.6,15.8,11.4,108.2
65 years and over,73.4,6.1,5.2,6.4,89.4
Total,129.6,62.7,27.1,22.1,241.9
15–24 years,1.3,2.4,1.2,8,13.7
25–44 years,20.7,31.5,99.8,16.9,165.6
45–64 years,64.2,110.8,88.3,34.4,298.2
65 years and over,99,15.6,18.1,7.7,139.3


In [8]:
# Adding multiIndex (in index)
first_level = ['Relationship of carer to main recipient of care', 'total']
second_level = list(col_names)[1:-1]
levels_1_1 = list(itertools.product([first_level[0]], second_level[0:4]))
levels_1_2 = list(itertools.product([first_level[1]], ['total']))
levels = levels_1_1 + levels_1_2
df_carer_34.columns = pd.MultiIndex.from_tuples(levels, names=["Relation", "Type"])
df_carer_34.reset_index(inplace=True)
df_carer_34

Relation,Index,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,total
Type,Unnamed: 1_level_1,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
0,,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,Total
1,15–24 years,0,4.4,0,0,7
2,25–44 years,11.8,17.6,6.4,3.8,39
3,45–64 years,46.2,36.6,15.8,11.4,108.2
4,65 years and over,73.4,6.1,5.2,6.4,89.4
5,Total,129.6,62.7,27.1,22.1,241.9
6,15–24 years,1.3,2.4,1.2,8,13.7
7,25–44 years,20.7,31.5,99.8,16.9,165.6
8,45–64 years,64.2,110.8,88.3,34.4,298.2
9,65 years and over,99,15.6,18.1,7.7,139.3


In [9]:
# Adding multiIndex (in column)
df_carer_34.reset_index(inplace=True, drop=True)
df_carer_34

Relation,Index,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,total
Type,Unnamed: 1_level_1,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
0,,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,Total
1,15–24 years,0,4.4,0,0,7
2,25–44 years,11.8,17.6,6.4,3.8,39
3,45–64 years,46.2,36.6,15.8,11.4,108.2
4,65 years and over,73.4,6.1,5.2,6.4,89.4
5,Total,129.6,62.7,27.1,22.1,241.9
6,15–24 years,1.3,2.4,1.2,8,13.7
7,25–44 years,20.7,31.5,99.8,16.9,165.6
8,45–64 years,64.2,110.8,88.3,34.4,298.2
9,65 years and over,99,15.6,18.1,7.7,139.3


In [10]:
# Dropping useless rows
df_carer_34_1 =df_carer_34.copy()
df_carer_34_1.drop(labels=[0] , axis=0, inplace=True)
df_carer_34_1.reset_index(drop=True, inplace=True)
df_carer_34_1

Relation,Index,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,total
Type,Unnamed: 1_level_1,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
0,15–24 years,0.0,4.4,0.0,0.0,7.0
1,25–44 years,11.8,17.6,6.4,3.8,39.0
2,45–64 years,46.2,36.6,15.8,11.4,108.2
3,65 years and over,73.4,6.1,5.2,6.4,89.4
4,Total,129.6,62.7,27.1,22.1,241.9
5,15–24 years,1.3,2.4,1.2,8.0,13.7
6,25–44 years,20.7,31.5,99.8,16.9,165.6
7,45–64 years,64.2,110.8,88.3,34.4,298.2
8,65 years and over,99.0,15.6,18.1,7.7,139.3
9,Total,185.7,161.4,205.6,65.2,618.8


In [11]:
levels_names = df_carer_34_1['Index'].unique()
levels_names

array(['15–24 years', '25–44 years', '45–64 years', '65 years and over',
       'Total'], dtype=object)

In [12]:
first_level = ['Male primary carers', 'Female primary carers', 'All primary carers']
first_level

['Male primary carers', 'Female primary carers', 'All primary carers']

In [13]:
second_level = levels_names
second_level

array(['15–24 years', '25–44 years', '45–64 years', '65 years and over',
       'Total'], dtype=object)

In [14]:
# Adding multiIndex (in index)
levels_all = list(itertools.product(first_level, second_level))
multi_index = pd.MultiIndex.from_tuples(levels_all, names=['Gender', 'Age'])
df_carer_34_1.set_index(keys=multi_index, drop=True, inplace=True)
df_carer_34_1.drop(columns='Index', axis=1, inplace=True)
df_carer_34_1

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,Relation,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,Relationship of carer to main recipient of care,total
Unnamed: 0_level_1,Type,Partner of recipient,Child of recipient,Parent of recipient,Other relationship to recipient,total
Gender,Age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Male primary carers,15–24 years,0.0,4.4,0.0,0.0,7.0
Male primary carers,25–44 years,11.8,17.6,6.4,3.8,39.0
Male primary carers,45–64 years,46.2,36.6,15.8,11.4,108.2
Male primary carers,65 years and over,73.4,6.1,5.2,6.4,89.4
Male primary carers,Total,129.6,62.7,27.1,22.1,241.9
Female primary carers,15–24 years,1.3,2.4,1.2,8.0,13.7
Female primary carers,25–44 years,20.7,31.5,99.8,16.9,165.6
Female primary carers,45–64 years,64.2,110.8,88.3,34.4,298.2
Female primary carers,65 years and over,99.0,15.6,18.1,7.7,139.3
Female primary carers,Total,185.7,161.4,205.6,65.2,618.8


## 3. Export Clean Data to a CSV file

In [15]:
# Export full version
df_carer_34_1.to_csv('Output\EPIC4_T34_1_V1.csv', index=True)