In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
features_df = pd.read_csv('/content/Dataset_engineered.csv')

In [None]:
X = features_df.drop(columns=['customer_service_inquiries_encoded'])
y = features_df['customer_service_inquiries_encoded']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test, model_name):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Best Parameters: {grid_search.best_params_}")
    print(f"{model_name} Accuracy: {accuracy:.4f}")
    print(f"{model_name} Classification Report:\n{classification_report(y_test, y_pred)}")

    if hasattr(best_model, 'predict_proba'):
        y_pred_proba = best_model.predict_proba(X_test)
        if y_pred_proba.shape[1] > 2:  # Multi-class case
            roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
        else:  # Binary case
            roc_auc = roc_auc_score(y_test, y_pred_proba[:, 1] if y_pred_proba.shape[1] > 1 else y_pred_proba.ravel())
        print(f"{model_name} ROC-AUC Score: {roc_auc:.4f}")

    print("-" * 50)
    return accuracy


In [None]:
logistic_params = {'C': [0.01, 0.1, 1, 10], 'solver': ['liblinear', 'lbfgs']}
logistic_acc = tune_and_evaluate(LogisticRegression(), logistic_params, X_train_scaled, y_train, X_test_scaled, y_test, "Logistic Regression")

Logistic Regression Best Parameters: {'C': 1, 'solver': 'liblinear'}
Logistic Regression Accuracy: 1.0000
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8309
           1       1.00      1.00      1.00      8352
           2       1.00      1.00      1.00      8339

    accuracy                           1.00     25000
   macro avg       1.00      1.00      1.00     25000
weighted avg       1.00      1.00      1.00     25000

Logistic Regression ROC-AUC Score: 1.0000
--------------------------------------------------


In [None]:
dt_params = {'max_depth': [5, 10, 20], 'min_samples_split': [2, 5, 10]}
dt_acc = tune_and_evaluate(DecisionTreeClassifier(), dt_params, X_train, y_train, X_test, y_test, "Decision Tree")


Decision Tree Best Parameters: {'max_depth': 20, 'min_samples_split': 2}
Decision Tree Accuracy: 0.9912
Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      8309
           1       0.99      0.99      0.99      8352
           2       0.99      0.99      0.99      8339

    accuracy                           0.99     25000
   macro avg       0.99      0.99      0.99     25000
weighted avg       0.99      0.99      0.99     25000

Decision Tree ROC-AUC Score: 0.9963
--------------------------------------------------


In [None]:
rf_params = {'n_estimators': [50, 100, 200], 'max_depth': [10, 20, None]}
rf_acc = tune_and_evaluate(RandomForestClassifier(), rf_params, X_train, y_train, X_test, y_test, "Random Forest")


Random Forest Best Parameters: {'max_depth': None, 'n_estimators': 200}
Random Forest Accuracy: 0.9802
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      8309
           1       0.98      0.97      0.98      8352
           2       0.98      0.99      0.98      8339

    accuracy                           0.98     25000
   macro avg       0.98      0.98      0.98     25000
weighted avg       0.98      0.98      0.98     25000

Random Forest ROC-AUC Score: 0.9993
--------------------------------------------------


In [None]:
print("\nModel Accuracy Comparison:")
print(f"Logistic Regression: {logistic_acc:.4f}")
print(f"Decision Tree: {dt_acc:.4f}")
print(f"Random Forest: {rf_acc:.4f}")


Model Accuracy Comparison:
Logistic Regression: 1.0000
Decision Tree: 0.9912
Random Forest: 0.9802
