# Job Stress

In [22]:
import pandas as pd
import numpy as np

df = pd.read_excel("./data/job stress.xlsx")

In [23]:
# Work Schedule Variability
df["work_schedule_variability"] = (
    df["Percent of workers that had work schedule variability required"] / 100
)


df["work_schedule_variability_logit"] = np.log(
    df["work_schedule_variability"] / 
    (1 - df["work_schedule_variability"] + 1e-6)
)


# Pause Work
df["pause_work"] = (
    df["Percent of workers that had the ability to pause work"] / 100
)

df["pause_work_logit"] = np.log(
    df["pause_work"] / 
    (1 - df["pause_work"] + 1e-6)
)

df.head()

Unnamed: 0,occupation1,occupation2,Percent of workers that had work schedule variability required,Percent of workers that did not have work schedule variability required,Percent of workers that had the ability to pause work,Percent of workers that did not have the ability to pause work,"Percent of workers that had a consistent, generally fast work pace","Percent of workers that had a consistent, generally slow work pace",Percent of workers that had a varying work pace,work_schedule_variability,work_schedule_variability_logit,pause_work,pause_work_logit
0,IT,Computer and mathematical occupations,27.6,72.4,94.4,5.6,61.1,3.0,35.9,0.276,-0.964392,0.944,2.824757
1,HR,Human resources managers,7.8,92.2,99.5,0.5,70.8,6.4,22.8,0.078,-2.469837,0.995,5.293105
2,Legal,Legal occupations,37.9,62.1,93.4,6.6,47.7,4.7,47.5,0.379,-0.493796,0.934,2.649807
3,Finance,Business and financial operations occupations,27.9,72.1,97.7,2.3,59.6,4.7,35.7,0.279,-0.949429,0.977,3.748949
4,Operations,General and operations managers,34.1,65.9,95.7,4.3,65.6,7.8,26.6,0.341,-0.658843,0.957,3.10258


In [24]:
epsilon = 1e-8

fast = df["Percent of workers that had a consistent, generally fast work pace"] / 100
slow = df["Percent of workers that had a consistent, generally slow work pace"] / 100
varying = df["Percent of workers that had a varying work pace"] / 100

df["work_pace"] = np.sqrt(2/3) * np.log(
    (fast + epsilon) / np.sqrt((slow + epsilon) * (varying + epsilon))
)

print(df["work_pace"].describe())


count    8.000000
mean     2.110797
std      2.382275
min      0.674643
25%      1.165337
50%      1.345026
75%      1.574623
max      7.932585
Name: work_pace, dtype: float64


In [25]:
df.head()

Unnamed: 0,occupation1,occupation2,Percent of workers that had work schedule variability required,Percent of workers that did not have work schedule variability required,Percent of workers that had the ability to pause work,Percent of workers that did not have the ability to pause work,"Percent of workers that had a consistent, generally fast work pace","Percent of workers that had a consistent, generally slow work pace",Percent of workers that had a varying work pace,work_schedule_variability,work_schedule_variability_logit,pause_work,pause_work_logit,work_pace
0,IT,Computer and mathematical occupations,27.6,72.4,94.4,5.6,61.1,3.0,35.9,0.276,-0.964392,0.944,2.824757,1.447515
1,HR,Human resources managers,7.8,92.2,99.5,0.5,70.8,6.4,22.8,0.078,-2.469837,0.995,5.293105,1.443835
2,Legal,Legal occupations,37.9,62.1,93.4,6.6,47.7,4.7,47.5,0.379,-0.493796,0.934,2.649807,0.947777
3,Finance,Business and financial operations occupations,27.9,72.1,97.7,2.3,59.6,4.7,35.7,0.279,-0.949429,0.977,3.748949,1.246218
4,Operations,General and operations managers,34.1,65.9,95.7,4.3,65.6,7.8,26.6,0.341,-0.658843,0.957,3.10258,1.237856


In [27]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

fast = fast.replace(0, epsilon)
slow = slow.replace(0, epsilon)
varying = varying.replace(0, epsilon)

gmean = (fast * slow * varying) ** (1/3)

df["clr_fast"] = np.log((fast + epsilon) / gmean)
df["clr_slow"] = np.log((slow + epsilon) / gmean)
df["clr_varying"] = np.log((varying + epsilon) / gmean)

clr_cols = ["clr_fast", "clr_slow", "clr_varying"]

X = StandardScaler().fit_transform(df[clr_cols])

pca = PCA(n_components=1)
df["work_pace_pca"] = pca.fit_transform(X)

print("Explained variance ratio:", pca.explained_variance_ratio_)

Explained variance ratio: [0.98340421]


In [28]:
df.head()

Unnamed: 0,occupation1,occupation2,Percent of workers that had work schedule variability required,Percent of workers that did not have work schedule variability required,Percent of workers that had the ability to pause work,Percent of workers that did not have the ability to pause work,"Percent of workers that had a consistent, generally fast work pace","Percent of workers that had a consistent, generally slow work pace",Percent of workers that had a varying work pace,work_schedule_variability,work_schedule_variability_logit,pause_work,pause_work_logit,work_pace,clr_fast,clr_slow,clr_varying,work_pace_pca
0,IT,Computer and mathematical occupations,27.6,72.4,94.4,5.6,61.1,3.0,35.9,0.276,-0.964392,0.944,2.824757,1.447515,1.181891,-1.832008,0.650117,-0.577181
1,HR,Human resources managers,7.8,92.2,99.5,0.5,70.8,6.4,22.8,0.078,-2.469837,0.995,5.293105,1.443835,1.178887,-1.224674,0.045788,-0.913157
2,Legal,Legal occupations,37.9,62.1,93.4,6.6,47.7,4.7,47.5,0.379,-0.493796,0.934,2.649807,0.947777,0.773857,-1.543512,0.769655,-0.716181
3,Finance,Business and financial operations occupations,27.9,72.1,97.7,2.3,59.6,4.7,35.7,0.279,-0.949429,0.977,3.748949,1.246218,1.017533,-1.52256,0.505028,-0.740119
4,Operations,General and operations managers,34.1,65.9,95.7,4.3,65.6,7.8,26.6,0.341,-0.658843,0.957,3.10258,1.237856,1.010705,-1.118746,0.108041,-0.963264


In [30]:
df.to_csv("./data/job_stress_merge.csv", index=False)
