<a href="https://colab.research.google.com/github/yogithamekala/Reproducing_deep_learning_model/blob/main/Employee_Attrition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import class_weight
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


In [8]:
def load_data(Employee_Attrition):
    data = pd.read_csv(Employee_Attrition)
    return data

In [9]:
def preprocess_data(data):
    # Convert Attrition to binary (1 for Yes, 0 for No)
    data['Attrition'] = data['Attrition'].apply(lambda x: 1 if x == 'Yes' else 0)

    # Drop unnecessary columns
    data = data.drop(['EmployeeNumber', 'Over18', 'EmployeeCount', 'StandardHours'], axis=1)

    # One-Hot encode categorical features
    data = pd.get_dummies(data)

    # Split features and target
    X = data.drop('Attrition', axis=1)
    y = data['Attrition']

    return X, y

In [10]:
def split_and_normalize(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize numerical features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

In [11]:
def build_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')  # Sigmoid for binary classification
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [26]:
def train_model(model, X_train, y_train, X_test, y_test):
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Calculate class weights to handle imbalanced data
    class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)

    # Ensure class indices are 0 and 1
    class_weights_dict = {0: class_weights[0], 1: class_weights[1]}

    # Train the model
    history = model.fit(X_train, y_train, epochs=50, batch_size=64,
                        validation_data=(X_test, y_test),
                        class_weight=class_weights_dict,
                        callbacks=[early_stopping])

    return history

In [20]:
!pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.3.2
    Uninstalling scikit-learn-1.3.2:
      Successfully uninstalled scikit-learn-1.3.2
Successfully installed scikit-learn-1.5.1


In [28]:
if __name__ == '__main__':
    # Step 1: Load the data
    data = load_data('Employee_Attrition.csv')

    # Step 2: Preprocess the data
    X, y = preprocess_data(data)

    # Convert y to categorical
    y = pd.Categorical(y)

    # Step 3: Split and normalize the data
    X_train, X_test, y_train, y_test = split_and_normalize(X, y)

    # Step 4: Build the model
    model = build_model(input_dim=X_train.shape[1])

    # Step 5: Train the model
    history = train_model(model, X_train, y_train, X_test, y_test)

    # Step 6: Evaluate the model
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f'Test Accuracy: {test_acc}')


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.4887 - loss: 0.7647 - val_accuracy: 0.6293 - val_loss: 0.6459
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5840 - loss: 0.6345 - val_accuracy: 0.7245 - val_loss: 0.5858
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6432 - loss: 0.6498 - val_accuracy: 0.7517 - val_loss: 0.5593
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6846 - loss: 0.6034 - val_accuracy: 0.7551 - val_loss: 0.5493
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7122 - loss: 0.5570 - val_accuracy: 0.7653 - val_loss: 0.5382
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6926 - loss: 0.5767 - val_accuracy: 0.7619 - val_loss: 0.5256
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━

In [30]:
def load_data(Employee_Attrition):
    data = pd.read_csv(Employee_Attrition)
    return data

In [31]:
def preprocess_data(data):
    # Convert Attrition to binary
    data['Attrition'] = data['Attrition'].apply(lambda x: 1 if x == 'Yes' else 0)

    # Drop unnecessary columns
    data = data.drop(['EmployeeNumber', 'Over18', 'EmployeeCount', 'StandardHours'], axis=1)

    # One-Hot encode categorical features
    data = pd.get_dummies(data)

    return data

In [32]:
def split_and_normalize(X, y):
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize numerical features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

In [34]:
!git clone https://github.com/yogithamekala/Reproducing_deep_learning_model
%cd Reproducing_deep_learning_model


Cloning into 'Reproducing_deep_learning_model'...
remote: Enumerating objects: 4, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (4/4), done.
/content/Reproducing_deep_learning_model


In [39]:
# Use a string literal to represent the file name.
'README.md'              # Explain the project
# requirements.txt        # Python dependencies
# train_model.py          # Main script
# dataset/                # Dataset (or provide a link in the README)
# results/                # Output results (optional)
# utils/                  # Any utility scripts (optional)

'README.md'

In [40]:
pip freeze > requirements.txt