# EPIC 4 - Become a Carer - T33.1 - Employed persons, Carer status, Employment Characteristics
This Jupyter Notebook is about preparing 'Number of primary carers, by age and sex, 2018' XLS file for being used in Mo-Buddy Website Solution.
1. Read Raw Data
2. Clean Raw Data
3. Export Clean Data

- Table_29.1 - Carer status by sex, age
- Table_30.1 - Carer status, recipient, disability status, age, sex
- Table_31.1 - Carer status, by geographic location, age, sex
- Table_32.1 - 15-.. yo, carer status, sex 
- Table_33.1 - Employed 15-64 yo, carer status, sex 
- Table_34.1 - Primary Carer, recipient, age, sex
- Table_35.1 - Carer and recipient living in or other households, age
- Table_36.1 - Primary Carer, Time spending in care, sex  ****
- Table_37.1 - Primary Carer, age, time spending in care, disability status, ***** 
- Table_38.1 - Primary Carer, time spending in care, select recipint   ****
- Table_39.1 - Primary Carer, reason for taking a carer, sex   ****
- Table_40.1 - Primary Carer, reason for taking a carer, age of recipient ****
- Table_41.1 - Primary Carer, satisfaction of service recieved, sex, age  ****
- Table_42.1 - Primary Carer, social community participation with recipient, time spending in car, age  ****
- Table_43.1 - Primary Carer, social community participation without recipient, time spending in car, age  ****

In [1]:
# Import Packages
import pandas as pd
import itertools
# import re

In [2]:
# Set option to display all columns
pd.set_option('display.max_columns', None)

## 1. Read in Raw Data from a XLS file

In [3]:
# Function for reading in raw data from a XLS file
def read_in_data(file_path, sheet_name):
    """
    Function for reading in raw data from XLS file.
    Inputs: 
        - file_path, type: string, desc: XLS file path
        - sheet_name, type: string, desc: Sheet Name
    Outputs:
        - raw_data, type: dataframe, desc: Raw data
    """

    raw_data = pd.read_excel(io=file_path, sheet_name=sheet_name)
    
    return raw_data

In [4]:
# Read in data
filepath_raw_data = 'DataBases/44300do030.xls'
sheet_name = 'Table_33.1'
df_raw_carer_33 = read_in_data(filepath_raw_data, sheet_name)

In [5]:
# Check how the dataframe looks like
df_raw_carer_33

Unnamed: 0,Australian Bureau of Statistics,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,"44300DO030_2018 Disability, Ageing and Carers,...",,,,,
1,Released at 11.30am (Canberra time) Thurs 24 O...,,,,,
2,"Table 33.1 Employed persons aged 15-64, living...",,,,,
3,Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
4,ESTIMATE ('000),,,,,
5,Occupation of main job,,,,,
6,Managers,41.1,123.4,165.5,1345.2,1507.9
7,Professionals,107.6,230.4,339.8,2650.9,2992.1
8,Technicians and Trades Workers,26.7,125.6,153.2,1610,1764.2
9,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8


## 2. Clean up Raw Data

In [6]:
# Drop useless rows and columns
col_names = df_raw_carer_33.iloc[3,:]
df_carer_33 = df_raw_carer_33.copy()
df_carer_33.drop(labels=[0,1,2, 4,5,16,17,39,40,44,46,47,48], axis=0, inplace=True)
df_carer_33.reset_index(drop=True, inplace=True)
df_carer_33.columns = col_names
df_carer_33.rename(columns={'Total':'total'}, inplace=True)
df_carer_33.columns.names = ['']
df_carer_33

Unnamed: 0,Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
0,Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
1,Managers,41.1,123.4,165.5,1345.2,1507.9
2,Professionals,107.6,230.4,339.8,2650.9,2992.1
3,Technicians and Trades Workers,26.7,125.6,153.2,1610,1764.2
4,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
5,Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
6,Sales Workers,28.5,78.3,106.3,970.9,1077.2
7,Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788
8,Labourers,26.8,86.9,111,997.8,1112.1
9,Inadequately described,1.2,2.3,1.7,13.2,16.5


In [7]:
# Reset index and prepare for adding multiindex (in column)
df_carer_33.rename(columns={df_carer_33.columns[0]:'Index'}, inplace=True)
df_carer_33.set_index(keys='Index', drop=True, inplace=True)
df_carer_33

Unnamed: 0_level_0,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
Managers,41.1,123.4,165.5,1345.2,1507.9
Professionals,107.6,230.4,339.8,2650.9,2992.1
Technicians and Trades Workers,26.7,125.6,153.2,1610,1764.2
Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
Sales Workers,28.5,78.3,106.3,970.9,1077.2
Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788
Labourers,26.8,86.9,111,997.8,1112.1
Inadequately described,1.2,2.3,1.7,13.2,16.5


In [8]:
# Adding multiIndex (in index)
first_level = ['Carer Status', 'total']
second_level = list(col_names)[1:-1]
levels_1_1 = list(itertools.product([first_level[0]], second_level[0:4]))
levels_1_2 = list(itertools.product([first_level[1]], ['total']))
levels = levels_1_1 + levels_1_2
df_carer_33.columns = pd.MultiIndex.from_tuples(levels, names=["Carer", "Type"])
df_carer_33.reset_index(inplace=True)
df_carer_33

Carer,Index,Carer Status,Carer Status,Carer Status,Carer Status,total
Type,Unnamed: 1_level_1,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
0,Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
1,Managers,41.1,123.4,165.5,1345.2,1507.9
2,Professionals,107.6,230.4,339.8,2650.9,2992.1
3,Technicians and Trades Workers,26.7,125.6,153.2,1610,1764.2
4,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
5,Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
6,Sales Workers,28.5,78.3,106.3,970.9,1077.2
7,Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788
8,Labourers,26.8,86.9,111,997.8,1112.1
9,Inadequately described,1.2,2.3,1.7,13.2,16.5


In [9]:
# Adding multiIndex (in column)
df_carer_33.reset_index(inplace=True, drop=True)
df_carer_33

Carer,Index,Carer Status,Carer Status,Carer Status,Carer Status,total
Type,Unnamed: 1_level_1,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
0,Employment characteristics,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,Total
1,Managers,41.1,123.4,165.5,1345.2,1507.9
2,Professionals,107.6,230.4,339.8,2650.9,2992.1
3,Technicians and Trades Workers,26.7,125.6,153.2,1610,1764.2
4,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
5,Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
6,Sales Workers,28.5,78.3,106.3,970.9,1077.2
7,Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788
8,Labourers,26.8,86.9,111,997.8,1112.1
9,Inadequately described,1.2,2.3,1.7,13.2,16.5


In [10]:
# Dropping useless rows
df_carer_33_1 =df_carer_33.copy()
df_carer_33_1.drop(labels=[0] , axis=0, inplace=True)
df_carer_33_1.reset_index(drop=True, inplace=True)
df_carer_33_1

Carer,Index,Carer Status,Carer Status,Carer Status,Carer Status,total
Type,Unnamed: 1_level_1,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
0,Managers,41.1,123.4,165.5,1345.2,1507.9
1,Professionals,107.6,230.4,339.8,2650.9,2992.1
2,Technicians and Trades Workers,26.7,125.6,153.2,1610.0,1764.2
3,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
4,Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
5,Sales Workers,28.5,78.3,106.3,970.9,1077.2
6,Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788.0
7,Labourers,26.8,86.9,111.0,997.8,1112.1
8,Inadequately described,1.2,2.3,1.7,13.2,16.5
9,All occupations,350.8,947.4,1298.2,10943.4,12239.8


In [11]:
levels_names = list(df_carer_33_1['Index'])
levels_names

['Managers',
 'Professionals',
 'Technicians and Trades Workers',
 'Community and Personal Service Workers',
 'Clerical and Administrative Workers',
 'Sales Workers',
 'Machinery Operators and Drivers',
 'Labourers',
 'Inadequately described',
 'All occupations',
 'Agriculture, forestry and fishing',
 'Mining',
 'Manufacturing',
 'Electricity, gas, water and waste services',
 'Construction',
 'Wholesale trade',
 'Retail trade',
 'Accommodation and food services',
 'Transport, postal and warehousing',
 'Information media and telecommunications',
 'Financial and insurance services',
 'Rental, hiring and real estate services',
 'Professional, scientific and technical services',
 'Administrative and support services',
 'Public administration and safety',
 'Education and training',
 'Health care and social assistance',
 'Arts and recreation services',
 'Other services',
 'Inadequately described',
 'All industries',
 'Government',
 'Private',
 'Not known',
 'Total']

In [12]:
first_level = ['Occupation on main job', 'Industry of main job', 'Sector on employment of main job']
first_level

['Occupation on main job',
 'Industry of main job',
 'Sector on employment of main job']

In [13]:
second_level = levels_names
second_level

['Managers',
 'Professionals',
 'Technicians and Trades Workers',
 'Community and Personal Service Workers',
 'Clerical and Administrative Workers',
 'Sales Workers',
 'Machinery Operators and Drivers',
 'Labourers',
 'Inadequately described',
 'All occupations',
 'Agriculture, forestry and fishing',
 'Mining',
 'Manufacturing',
 'Electricity, gas, water and waste services',
 'Construction',
 'Wholesale trade',
 'Retail trade',
 'Accommodation and food services',
 'Transport, postal and warehousing',
 'Information media and telecommunications',
 'Financial and insurance services',
 'Rental, hiring and real estate services',
 'Professional, scientific and technical services',
 'Administrative and support services',
 'Public administration and safety',
 'Education and training',
 'Health care and social assistance',
 'Arts and recreation services',
 'Other services',
 'Inadequately described',
 'All industries',
 'Government',
 'Private',
 'Not known',
 'Total']

In [14]:
# Adding multiIndex (in column)
first_level = ['Occupation on main job', 'Industry of main job', 'Sector on employment of main job', 'Total']
# second_level = list(col_names)[0:-1]
levels_1_1 = list(itertools.product([first_level[0]], second_level[0:10]))
levels_1_2 = list(itertools.product([first_level[1]], second_level[10:31]))
levels_1_3 = list(itertools.product([first_level[2]], second_level[31:-1]))
levels_1_4 = list(itertools.product([first_level[3]], [second_level[-1]]))
levels = levels_1_1 + levels_1_2 + levels_1_3 + levels_1_4
multi_index = pd.MultiIndex.from_tuples(levels, names=['Employment', 'Characteristics'])
df_carer_33_1.set_index(keys=multi_index, drop=True, inplace=True)
df_carer_33_1.drop(columns='Index', axis=1, inplace=True)
df_carer_33_1

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,Carer,Carer Status,Carer Status,Carer Status,Carer Status,total
Unnamed: 0_level_1,Type,Primary carer,"Carer, but not a primary carer",Total carers,Not a carer,total
Employment,Characteristics,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Occupation on main job,Managers,41.1,123.4,165.5,1345.2,1507.9
Occupation on main job,Professionals,107.6,230.4,339.8,2650.9,2992.1
Occupation on main job,Technicians and Trades Workers,26.7,125.6,153.2,1610.0,1764.2
Occupation on main job,Community and Personal Service Workers,42.1,94.8,136.8,1185.5,1322.8
Occupation on main job,Clerical and Administrative Workers,63.2,141.9,205.1,1455.8,1660.5
Occupation on main job,Sales Workers,28.5,78.3,106.3,970.9,1077.2
Occupation on main job,Machinery Operators and Drivers,13.5,63.3,78.2,712.6,788.0
Occupation on main job,Labourers,26.8,86.9,111.0,997.8,1112.1
Occupation on main job,Inadequately described,1.2,2.3,1.7,13.2,16.5
Occupation on main job,All occupations,350.8,947.4,1298.2,10943.4,12239.8


## 3. Export Clean Data to a CSV file

In [15]:
# Export full version
df_carer_33_1.to_csv('Output\EPIC4_T33_1_V1.csv', index=True)