# Labour Market Statistics in Canada

In [5]:
import os
import pandas as pd
from datetime import datetime
import re
from dotenv import dotenv_values


censored_dict = dotenv_values(".env")
directory = censored_dict['FILE_DIRECTORY']
filename = "2020-2024_labour_force_stats.csv"
input_df = pd.read_csv(os.path.join(directory, filename))

In [6]:
input_df.head()

Unnamed: 0,Labour force characteristics,Data type,20-Mar,20-Apr,20-May,20-Jun,20-Jul,20-Aug,20-Sep,20-Oct,...,23-Jun,23-Jul,23-Aug,23-Sep,23-Oct,23-Nov,23-Dec,24-Jan,24-Feb,24-Mar
0,Population 6 7,"Seasonally adjusted (x 1,000)",24530.10,24528.70,24526.20,24531.20,24536.10,24538.80,24542.70,24546.80,...,25189.70,25249.00,25323.60,25384.50,25448.60,25506.10,25562.60,25662.60,25727.90,25800.20
1,,Trend-cycle 8,..,..,..,..,..,..,..,..,...,..,..,..,..,..,..,..,..,..,..
2,Labour force 9,"Seasonally adjusted (x 1,000)",18812.00,17791.80,18257.20,18971.60,19154.80,19266.90,19399.40,19457.30,...,20257.70,20270.30,20282.10,20344.80,20385.30,20416.20,20435.80,20461.80,20553.60,20626.30
3,,Trend-cycle 8,..,..,..,..,..,..,..,..,...,..,..,..,..,..,..,..,..,..,..
4,Employment 10,"Seasonally adjusted (x 1,000)",17227.00,15305.60,15599.10,16603.40,16993.20,17233.50,17673.10,17744.10,...,19135.60,19131.00,19153.00,19203.40,19216.80,19234.30,19256.20,19264.90,19332.60,19336.70


In [7]:
# filter out unused rows
input_df = input_df[input_df['Labour force characteristics'].notna()]

In [8]:
input_df.head()

Unnamed: 0,Labour force characteristics,Data type,20-Mar,20-Apr,20-May,20-Jun,20-Jul,20-Aug,20-Sep,20-Oct,...,23-Jun,23-Jul,23-Aug,23-Sep,23-Oct,23-Nov,23-Dec,24-Jan,24-Feb,24-Mar
0,Population 6 7,"Seasonally adjusted (x 1,000)",24530.1,24528.7,24526.2,24531.2,24536.1,24538.8,24542.7,24546.8,...,25189.7,25249.0,25323.6,25384.5,25448.6,25506.1,25562.6,25662.6,25727.9,25800.2
2,Labour force 9,"Seasonally adjusted (x 1,000)",18812.0,17791.8,18257.2,18971.6,19154.8,19266.9,19399.4,19457.3,...,20257.7,20270.3,20282.1,20344.8,20385.3,20416.2,20435.8,20461.8,20553.6,20626.3
4,Employment 10,"Seasonally adjusted (x 1,000)",17227.0,15305.6,15599.1,16603.4,16993.2,17233.5,17673.1,17744.1,...,19135.6,19131.0,19153.0,19203.4,19216.8,19234.3,19256.2,19264.9,19332.6,19336.7
6,Full-time employment 11,"Seasonally adjusted (x 1,000)",14553.2,13138.1,13358.0,13898.2,13961.5,14108.0,14529.4,14534.9,...,15947.9,15920.3,15948.5,15964.5,15989.9,16028.4,16027.8,16029.4,16099.5,16110.0
8,Part-time employment 12,"Seasonally adjusted (x 1,000)",2673.7,2167.4,2241.1,2705.2,3031.7,3125.5,3143.8,3209.3,...,3187.7,3210.6,3204.4,3239.0,3226.9,3205.9,3228.4,3235.4,3233.1,3226.7


In [9]:
def change_date_format(col_name):
    '''
        Arg: str
        Return: str
            convert YY-MMM date format to the standard YYYY-MM-DD format
            ex) 20-Feb -> 2020-02-01
            else return original col name
    '''
    if '-' in col_name:
        date_obj = datetime.strptime(col_name, "%y-%b")
        return date_obj.strftime("%Y-%m-%d")
    else:
        return col_name

# this is the dictionary that's going to be used to rename columns
input_df = input_df.drop(columns='Data type')
rename_dict = {col_name:change_date_format(col_name) for col_name in input_df.columns}
not_month_cols = {'Labour force characteristics': 'Date'}
rename_dict.update(not_month_cols)
input_df.rename(columns=rename_dict, inplace=True)

In [10]:
def remove_numbers(text):
    '''
        Arg: str
        Return: str
            remove numeric values from strings
            ex) Employment 12 -> Employment
    '''
    return re.sub(r'\d+', '', text)

input_df['Date'] = [remove_numbers(category) for category in input_df['Date']]

In [11]:
cleaned_df = input_df.transpose()

In [12]:
cleaned_df.head()

Unnamed: 0,0,2,4,6,8,10,13,15,17
Date,Population,Labour force,Employment,Full-time employment,Part-time employment,Unemployment,Unemployment rate,Participation rate,Employment rate
2020-03-01,24530.10,18812.00,17227.00,14553.20,2673.70,1585.00,8.4,76.7,70.2
2020-04-01,24528.70,17791.80,15305.60,13138.10,2167.40,2486.20,14,72.5,62.4
2020-05-01,24526.20,18257.20,15599.10,13358.00,2241.10,2658.10,14.6,74.4,63.6
2020-06-01,24531.20,18971.60,16603.40,13898.20,2705.20,2368.20,12.5,77.3,67.7
