In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the 'bank-full.csv' dataset
file_path = 'data/bank-full.csv'
bank_full_data = pd.read_csv(file_path, delimiter=';')

# Encode categorical variables using LabelEncoder
label_encoders = {}
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome', 'y']

# Apply LabelEncoder to each categorical column
for col in categorical_columns:
    le = LabelEncoder()
    bank_full_data[col] = le.fit_transform(bank_full_data[col])
    label_encoders[col] = le

# Split the data into features (X) and target (y)
X = bank_full_data.drop('y', axis=1)  # Features
y = bank_full_data['y']               # Target

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data (necessary for KNN and SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the classifiers
knn = KNeighborsClassifier()
log_reg = LogisticRegression(max_iter=1000)
decision_tree = DecisionTreeClassifier()
svm = SVC()

# Dictionary to store the results
results = {}

# Function to train and evaluate a model
def train_and_evaluate(model, model_name):
    model.fit(X_train_scaled, y_train)  # Train the model
    y_pred = model.predict(X_test_scaled)  # Predict on test data
    accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy
    report = classification_report(y_test, y_pred)  # Classification report
    results[model_name] = {'accuracy': accuracy, 'report': report}

# Train and evaluate each model
train_and_evaluate(knn, "KNN")
train_and_evaluate(log_reg, "Logistic Regression")
train_and_evaluate(decision_tree, "Decision Tree")
train_and_evaluate(svm, "SVM")

# Display the results
for model_name, result in results.items():
    print(f"Model: {model_name}")
    print(f"Accuracy: {result['accuracy']}")
    print(f"Classification Report:\n{result['report']}")
    print("-" * 50)


Model: KNN
Accuracy: 0.8911865531350216
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.97      0.94      7952
           1       0.59      0.33      0.43      1091

    accuracy                           0.89      9043
   macro avg       0.75      0.65      0.68      9043
weighted avg       0.87      0.89      0.88      9043

--------------------------------------------------
Model: Logistic Regression
Accuracy: 0.8878690699988941
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.98      0.94      7952
           1       0.60      0.22      0.32      1091

    accuracy                           0.89      9043
   macro avg       0.75      0.60      0.63      9043
weighted avg       0.86      0.89      0.86      9043

--------------------------------------------------
Model: Decision Tree
Accuracy: 0.8724980648015039
Classification Report:
              precision    re