In [10]:
import pandas as pd
import os
from dotenv import load_dotenv
import google.generativeai as genai

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report

In [11]:
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini = genai.GenerativeModel("gemini-3-flash-preview")

In [12]:
df = pd.read_csv("../data/credit_risk_dataset.csv")

X = df.drop(columns=["default_risk", "customer_financial_statement"])
y = df["default_risk"]

# One-Hot Encode
X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [14]:
model_nb = GaussianNB()
model_nb.fit(X_train_scaled, y_train)

pred_nb = model_nb.predict(X_test_scaled)
prob_nb = model_nb.predict_proba(X_test_scaled)[:,1]

acc_nb = accuracy_score(y_test, pred_nb)
f1_nb = f1_score(y_test, pred_nb)
auc_nb = roc_auc_score(y_test, prob_nb)

cv_nb = cross_val_score(model_nb, X_train_scaled, y_train, cv=5, scoring="f1").mean()

feature_means = pd.DataFrame(model_nb.theta_, columns=X.columns, index=["No Default","Default"])
importance_nb = feature_means.loc["Default"].sort_values(ascending=False).head(10)

In [17]:
print("\n--- Naive Bayes Classification Report ---")
print(classification_report(y_test, pred_nb))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_nb))


--- Naive Bayes Classification Report ---
              precision    recall  f1-score   support

           0       1.00      0.84      0.91       193
           1       0.60      1.00      0.75        47

    accuracy                           0.87       240
   macro avg       0.80      0.92      0.83       240
weighted avg       0.92      0.87      0.88       240

Confusion Matrix:
 [[162  31]
 [  0  47]]


In [15]:
print("Naive Bayes:", acc_nb, f1_nb, auc_nb, cv_nb)
print("\nTop Naive Bayes Features:")
print(importance_nb)

Naive Bayes: 0.8708333333333333 0.752 0.9196891191709844 0.7482858846144488

Top Naive Bayes Features:
num_late_payments                1.327877
credit_utilization_ratio         0.184890
education_level_Master           0.085362
loan_amount                      0.079617
account_tenure_years             0.074108
monthly_income                   0.052006
region_Urban                     0.040660
employment_type_Self-Employed    0.036752
region_Suburban                  0.025952
existing_loans_count             0.014408
Name: Default, dtype: float64


In [16]:
prompt = f"""
You are a financial data analyst.

We trained model for loan default prediction.

Naive Bayes:
Accuracy = {acc_nb:.3f}
F1 Score = {f1_nb:.3f}
ROC-AUC = {auc_nb:.3f}
Cross-Validation F1 = {cv_nb:.3f}
Top Features:
{importance_nb.to_string()}

Please provide:
1. Model performance summary
2. Financial insights about default risk patterns
3. Feature interpretation
4. Business recommendations
"""

response = gemini.generate_content(prompt).text

print("\nGemini AI Interpretation:\n")
print(response)


Gemini AI Interpretation:

As a financial data analyst, I have reviewed the performance metrics and feature importance for the Naive Bayes loan default prediction model. Below is an analytical breakdown of the results, their financial implications, and strategic recommendations for the lending portfolio.

---

### 1. Model Performance Summary
The model demonstrates **strong discriminative power** but shows room for improvement in precision-recall balancing.

*   **Discrimination Capability (ROC-AUC = 0.920):** This is an excellent score. It indicates that in 92% of cases, the model will rank a randomly chosen defaulting loan higher than a non-defaulting one. The model is highly effective at separating "good" borrowers from "bad" ones.
*   **Reliability (F1 Score = 0.752 vs. CV F1 = 0.748):** The near-identical match between the test F1 score and the Cross-Validation score suggests high model stability. The model is not overfitting and is likely to perform consistently on unseen data.
