<a href="https://colab.research.google.com/github/yugan243/Telco-Customer-Churn/blob/main/Data%20Preprocessing%20and%20Model%20Training/3_hyperparams_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!git clone https://github.com/yugan243/Telco-Customer-Churn.git

fatal: destination path 'Telco-Customer-Churn' already exists and is not an empty directory.


## HyperParameter Tuning with Cross Validation

### Import Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import warnings
from /content/Telco-Customer-Churn/Scripts.business_focused_evaluator import BusinessModelEvaluator
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import (
                            classification_report,
                            confusion_matrix,
                            accuracy_score,
                            f1_score,
                            precision_score,
                            recall_score,
                            )
warnings.filterwarnings('ignore')

### Data Loading

In [None]:
X_train = np.load('../Artifacts/X_train.npz', allow_pickle=True)['arr_0']
Y_train = np.load('../Artifacts/Y_train.npz', allow_pickle=True)['arr_0']
X_test = np.load('../Artifacts/X_test.npz', allow_pickle=True)['arr_0']
Y_test = np.load('../Artifacts/Y_test.npz', allow_pickle=True)['arr_0']

### 1. Random Forest + Grid Search CV + Stratified K Fold

In [None]:
# Define the parameter value ranges
params_grid = {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 15, 20],
            'min_samples_split': [5, 10],
            'min_samples_leaf': [2, 4],
            'max_features': ['sqrt', 'log2']
            }

# Define the metrics want to track
scorings = {
        'f1': 'f1',
        'recall': 'recall',
        'precision': 'precision'
        }

# Setup the model and the CV
rf_clf_tuned = RandomForestClassifier(random_state=42, n_jobs=-1)
cv_stratified = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(
                            estimator=rf_clf_tuned,
                            param_grid=params_grid,
                            scoring=scorings,
                            cv=cv_stratified,
                            verbose=2,
                            n_jobs=-1,
                            refit='f1',
                            )

# Run the hyperparemter search
print("Starting hyperparameter tuning for Random Forest...")
grid_search.fit(X_train, Y_train)
print("Tuning complete.")

# Print the best results
print("\n--- Hyperparameter Tuning Results ---")
print(f"Best F1-Score (from CV): {grid_search.best_score_:.4f}")
print("Best hyperparameters:")
print(grid_search.best_params_)


# Get the index of the best model
best_index = grid_search.best_index_


# Print all metrics for the best model from the cross-validation results
print("\nCross-Validation Metrics for the Best Model:")
print(f"  - Mean F1-Score: {grid_search.cv_results_['mean_test_f1'][best_index]:.4f}")
print(f"  - Mean Recall:   {grid_search.cv_results_['mean_test_recall'][best_index]:.4f}")
print(f"  - Mean Precision:{grid_search.cv_results_['mean_test_precision'][best_index]:.4f}")

# Final Evaluation on the Test Set
best_rf_model = grid_search.best_estimator_
y_pred_best_rf = best_rf_model.predict(X_test)

print("\n--- Final Classification Report on Test Set ---")
print(classification_report(Y_test, y_pred_best_rf))