In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

In [None]:
# Load the Employee Attrition Dataset
# Replace 'employee_attrition.csv' with the actual path to your dataset
df = pd.read_csv('employee_attrition.csv')

# Display the first few rows of the dataset
print("Dataset Preview:")
print(df.head())

# Preprocessing
# Convert categorical features to numerical using Label Encoding
categorical_columns = ['JobRole', 'Overtime', 'Department', 'Attrition']
le = LabelEncoder()
for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# Define features (X) and target (y)
X = df.drop(columns=['Attrition'])
y = df['Attrition']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree model with default hyperparameters
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Make predictions
y_pred = dt_model.predict(X_test)

# Evaluate the model
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Hyperparameter tuning
# Example: Adjusting `max_depth` and `min_samples_split`
tuned_model = DecisionTreeClassifier(max_depth=5, min_samples_split=10, criterion='entropy', random_state=42)
tuned_model.fit(X_train, y_train)

# Make predictions with the tuned model
y_tuned_pred = tuned_model.predict(X_test)

# Evaluate the tuned model
print("\nTuned Model Classification Report:")
print(classification_report(y_test, y_tuned_pred))
print("Tuned Model Accuracy:", accuracy_score(y_test, y_tuned_pred))

In [None]:
criteria = ['gini', 'entropy']
results_criterion = []

for crit in criteria:
    print(f"Testing criterion: {crit}")
    tree = DecisionTreeClassifier(criterion=crit, random_state=42)
    acc = evaluate_model(tree)
    results_criterion.append((crit, acc))

# Plot criterion results
plt.bar([x[0] for x in results_criterion], [x[1] for x in results_criterion], color=['red', 'green'])
plt.title("Criterion Comparison")
plt.ylabel("Accuracy")
plt.show()

In [None]:
 Analyze 'max_depth' hyperparameter
depths = [3, 5, 10, None]
results_depth = []

for depth in depths:
    print(f"Testing max_depth: {depth}")
    tree = DecisionTreeClassifier(max_depth=depth, random_state=42)
    acc = evaluate_model(tree)
    results_depth.append((depth, acc))

# Plot max_depth results
depth_labels = [str(d) if d is not None else "None" for d in depths]
plt.plot(depth_labels, [x[1] for x in results_depth], marker='o')
plt.title("Max Depth Comparison")
plt.xlabel("Max Depth")
plt.ylabel("Accuracy")
plt.show()