In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Sample dataset (replace with your actual dataset)
data = {
    'Age': [25, 45, 22, 35, 29, 34, 50, 28, 40, 32],
    'Department': ['Sales', 'HR', 'IT', 'Sales', 'IT', 'HR', 'IT', 'Sales', 'HR', 'IT'],
    'YearsAtCompany': [1, 10, 2, 5, 3, 7, 15, 2, 8, 4],
    'Salary': [40000, 80000, 45000, 60000, 50000, 75000, 90000, 42000, 70000, 62000],
    'LeftCompany': [0, 1, 0, 0, 0, 1, 1, 0, 1, 0]  # Target variable: 1 = Left, 0 = Stayed
}

# Convert to a DataFrame
df = pd.DataFrame(data)

# Inspect the dataset
print("Dataset Overview:")
print(df.head())

# Preprocess the dataset
# Encode categorical variables
encoder = LabelEncoder()
df['Department'] = encoder.fit_transform(df['Department'])

# Separate features and target
X = df.drop('LeftCompany', axis=1)
y = df['LeftCompany']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Train a Logistic Regression Classifier
lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train, y_train)

# Evaluate both models
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    print(f"Evaluation Metrics for {model_name}:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(f"Precision: {precision_score(y_test, y_pred):.2f}")
    print(f"Recall: {recall_score(y_test, y_pred):.2f}")
    print(f"F1 Score: {f1_score(y_test, y_pred):.2f}")
    print(classification_report(y_test, y_pred))

# Evaluate Random Forest
evaluate_model(rf_model, X_test, y_test, "Random Forest")

# Evaluate Logistic Regression
evaluate_model(lr_model, X_test, y_test, "Logistic Regression")
import pandas as pd

Dataset Overview:
   Age Department  YearsAtCompany  Salary  LeftCompany
0   25      Sales               1   40000            0
1   45         HR              10   80000            1
2   22         IT               2   45000            0
3   35      Sales               5   60000            0
4   29         IT               3   50000            0
Evaluation Metrics for Random Forest:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Evaluation Metrics for Logistic Regression:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg   