# Project 4 - Identifiez les causes d'attrition au sein d'une ESN¶

### Feature engineering

In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Prétraitement et modélisation
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split



In [62]:
# Donées
df = pd.read_csv("../data/df_central_encode.csv")

In [63]:
df.head()

Unnamed: 0,age,revenu_mensuel,nombre_experiences_precedentes,annee_experience_totale,annees_dans_l_entreprise,annees_dans_le_poste_actuel,satisfaction_employee_environnement,note_evaluation_precedente,satisfaction_employee_nature_travail,satisfaction_employee_equipe,...,poste_Manager,poste_Représentant Commercial,poste_Ressources Humaines,poste_Senior Manager,poste_Tech Lead,domaine_etude_Entrepreunariat,domaine_etude_Infra & Cloud,domaine_etude_Marketing,domaine_etude_Ressources Humaines,domaine_etude_Transformation Digitale
0,41,5993,8,8,6,4,2,3,4,1,...,0,0,0,0,0,0,1,0,0,0
1,49,5130,1,10,10,7,3,2,2,4,...,0,0,0,0,0,0,1,0,0,0
2,37,2090,6,7,0,0,4,2,3,2,...,0,0,0,0,0,0,0,0,0,0
3,33,2909,1,8,8,7,4,3,3,3,...,0,0,0,0,0,0,1,0,0,0
4,27,3468,9,6,2,2,1,3,2,4,...,0,0,0,0,0,0,0,0,0,1


In [64]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 40 columns):
 #   Column                                     Non-Null Count  Dtype
---  ------                                     --------------  -----
 0   age                                        1470 non-null   int64
 1   revenu_mensuel                             1470 non-null   int64
 2   nombre_experiences_precedentes             1470 non-null   int64
 3   annee_experience_totale                    1470 non-null   int64
 4   annees_dans_l_entreprise                   1470 non-null   int64
 5   annees_dans_le_poste_actuel                1470 non-null   int64
 6   satisfaction_employee_environnement        1470 non-null   int64
 7   note_evaluation_precedente                 1470 non-null   int64
 8   satisfaction_employee_nature_travail       1470 non-null   int64
 9   satisfaction_employee_equipe               1470 non-null   int64
 10  satisfaction_employee_equilibre_pro_perso  1470 

### Creation des features

**Satisfaction** “au travail” au sens large `satisfaction_globale` : moyenne des 3 dimensions (environnement, nature, équipe).

In [65]:
sats = ["satisfaction_employee_environnement",
        "satisfaction_employee_nature_travail",
        "satisfaction_employee_equipe",
        "satisfaction_employee_equilibre_pro_perso"
        ]
df["satisfaction_globale"] = df[sats].mean(axis=1)


In [66]:
df["satisfaction_globale"]

0       2.00
1       3.00
2       3.00
3       3.25
4       2.50
        ... 
1465    3.25
1466    2.25
1467    2.25
1468    3.00
1469    2.50
Name: satisfaction_globale, Length: 1470, dtype: float64

In [67]:
df["exp_moins_3_years"] = (
    (df["annee_experience_totale"] < 3) &
    (df["annees_dans_l_entreprise"] < 3)
).astype("uint8")

In [68]:
print(df["exp_moins_3_years"].value_counts())


exp_moins_3_years
0    1347
1     123
Name: count, dtype: int64


In [70]:
df.drop(columns=['satisfaction_employee_environnement', 'satisfaction_employee_nature_travail',
                'satisfaction_employee_equipe', 'satisfaction_employee_equilibre_pro_perso',
                'annee_experience_totale', 'annees_dans_l_entreprise',
                ],
                inplace=True)

In [72]:
df.head(20)

Unnamed: 0,age,revenu_mensuel,nombre_experiences_precedentes,annees_dans_le_poste_actuel,note_evaluation_precedente,note_evaluation_actuelle,heure_supplementaires,augementation_salaire_precedente,attrition,nombre_participation_pee,...,poste_Ressources Humaines,poste_Senior Manager,poste_Tech Lead,domaine_etude_Entrepreunariat,domaine_etude_Infra & Cloud,domaine_etude_Marketing,domaine_etude_Ressources Humaines,domaine_etude_Transformation Digitale,satisfaction_globale,exp_moins_3_years
0,41,5993,8,4,3,3,1,11,1,0,...,0,0,0,0,1,0,0,0,2.0,0
1,49,5130,1,7,2,4,0,23,0,1,...,0,0,0,0,1,0,0,0,3.0,0
2,37,2090,6,0,2,3,1,15,1,0,...,0,0,0,0,0,0,0,0,3.0,0
3,33,2909,1,7,3,3,1,11,0,0,...,0,0,0,0,1,0,0,0,3.25,0
4,27,3468,9,2,3,3,0,12,0,1,...,0,0,0,0,0,0,0,1,2.5,0
5,32,3068,0,7,3,3,0,13,0,0,...,0,0,0,0,1,0,0,0,3.25,0
6,59,2670,4,0,4,4,1,20,0,3,...,0,0,0,0,0,0,0,1,1.75,0
7,30,2693,1,0,3,4,0,22,0,1,...,0,0,0,0,1,0,0,0,3.0,1
8,38,9526,0,7,2,4,0,21,0,0,...,0,0,1,0,1,0,0,0,3.0,0
9,36,5237,6,7,3,3,0,13,0,2,...,0,0,0,0,0,0,0,1,2.5,0
