# WellSAP

## Workplace WellBeing Work_Life_Balance Training

### Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import sklearn
import joblib

### Data Loading

In [2]:
attrition_data = pd.read_csv('./../dataset/attrition_data.csv', header=0, index_col='Employee_ID')
lifestyle_data = pd.read_csv('./../dataset/lifestyle_data.csv', header=0, index_col='Employee_ID')

### Data Pre-Processing

#### Lifestyle Data Pre-Processing

In [3]:
input_features_int_int = lifestyle_data.loc[:,['TODO_COMPLETED','LOST_VACATION']]
input_features_int_float = lifestyle_data.loc[:,['TIME_FOR_PASSION','FLOW']]

In [4]:
input_features_int_int = input_features_int_int.groupby('Employee_ID').mean().apply(np.ceil).astype(int)
input_features_int_float = input_features_int_float.groupby('Employee_ID').mean()

In [5]:
input_features = pd.concat([input_features_int_int, input_features_int_float], axis=1)
input_features

Unnamed: 0_level_0,TODO_COMPLETED,LOST_VACATION,TIME_FOR_PASSION,FLOW
Employee_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,5,2,3.36,3.24
2.0,6,5,2.68,3.16
3.0,6,3,3.40,3.16
4.0,6,4,4.44,2.76
5.0,6,5,2.24,2.24
...,...,...,...,...
307.0,7,2,3.60,4.28
308.0,7,3,3.84,4.12
309.0,7,2,3.40,3.24
310.0,7,3,3.72,3.36


#### Attrition Data Pre-Processing

In [6]:
gender_encoder = preprocessing.LabelEncoder()
attrition_data['Gender'] = gender_encoder.fit_transform(attrition_data['Gender'])
joblib.dump(gender_encoder,'./../scaler_and _encoder/work_balance_gender_encoder.pkl')
age_encoder = preprocessing.LabelEncoder()
attrition_data['Age_Group'] = age_encoder.fit_transform(attrition_data['Age_Group'])
joblib.dump(age_encoder,'./../scaler_and _encoder/work_balance_age_encoder.pkl')

['./../scaler_and _encoder/work_balance_age_encoder.pkl']

In [7]:
input_features = pd.concat([input_features, attrition_data['Age_Group'], attrition_data["Gender"]], axis=1)
input_features

Unnamed: 0_level_0,TODO_COMPLETED,LOST_VACATION,TIME_FOR_PASSION,FLOW,Age_Group,Gender
Employee_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,5,2,3.36,3.24,0,1
2.0,6,5,2.68,3.16,1,1
3.0,6,3,3.40,3.16,0,0
4.0,6,4,4.44,2.76,0,0
5.0,6,5,2.24,2.24,0,0
...,...,...,...,...,...,...
307.0,7,2,3.60,4.28,0,1
308.0,7,3,3.84,4.12,0,0
309.0,7,2,3.40,3.24,1,0
310.0,7,3,3.72,3.36,1,0


In [8]:
input_features[['Age_Group', 'Gender']] = input_features[['Age_Group', 'Gender']].astype('category')
input_features.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 311 entries, 1.0 to 311.0
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   TODO_COMPLETED    311 non-null    int64   
 1   LOST_VACATION     311 non-null    int64   
 2   TIME_FOR_PASSION  311 non-null    float64 
 3   FLOW              311 non-null    float64 
 4   Age_Group         311 non-null    category
 5   Gender            311 non-null    category
dtypes: category(2), float64(2), int64(2)
memory usage: 13.0 KB


### Output Data Pre-Processing

In [9]:
output_features = attrition_data["WorkLifeBalance"].astype('category')
output_features

  for val, m in zip(values.ravel(), mask.ravel())


Employee_ID
116.0    3.0
211.0    3.0
18.0     2.0
258.0    3.0
238.0    3.0
        ... 
246.0    3.0
74.0     3.0
249.0    4.0
286.0    3.0
158.0    2.0
Name: WorkLifeBalance, Length: 311, dtype: category
Categories (4, float64): [1.0, 2.0, 3.0, 4.0]

### Modelling

In [10]:
from sklearn.model_selection import train_test_split
input_train, input_test, ouput_train, output_test = train_test_split(input_features, output_features, test_size = 0.2, random_state=42)

In [11]:
from sklearn.svm import SVC
svm_classification = SVC(kernel='linear')
svm_classification.fit(input_train, ouput_train)
print(f"Train Score: {svm_classification.score(input_train, ouput_train)}")
print(f"Test Score: {svm_classification.score(input_test, output_test)}")
joblib.dump(svm_classification,'./../models/workplace_wellbeing_balance_model.pkl')

Train Score: 0.625
Test Score: 0.6349206349206349


['./../models/workplace_wellbeing_balance_model.pkl']