In [1]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
import google.generativeai as genai

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB

  from .autonotebook import tqdm as notebook_tqdm

All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as genai


In [2]:
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini = genai.GenerativeModel("gemini-3-flash-preview")

In [4]:
df = pd.read_csv("../data/credit_risk_dataset.csv")

X = df.drop(columns=["default_risk", "customer_financial_statement"])
y = df["default_risk"]

# Encode categorical 
X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = GaussianNB()
model.fit(X_train_scaled, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [6]:
pred = model.predict(X_test_scaled)
prob = model.predict_proba(X_test_scaled)[:,1]

acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
auc = roc_auc_score(y_test, prob)

print("Accuracy:", acc)
print("F1 Score:", f1)
print("ROC-AUC:", auc)

Accuracy: 0.8708333333333333
F1 Score: 0.752
ROC-AUC: 0.9196891191709844


In [7]:
print("\nConfusion Matrix:\n", confusion_matrix(y_test, pred))
print("\nClassification Report:\n", classification_report(y_test, pred))


Confusion Matrix:
 [[162  31]
 [  0  47]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.84      0.91       193
           1       0.60      1.00      0.75        47

    accuracy                           0.87       240
   macro avg       0.80      0.92      0.83       240
weighted avg       0.92      0.87      0.88       240



In [8]:
feature_means = pd.DataFrame(
    model.theta_, 
    columns=X.columns, 
    index=["No Default", "Default"]
)

top_features = feature_means.loc["Default"].sort_values(ascending=False).head(10)
print("\nTop Features Learned by Naive Bayes:\n")
print(top_features)


Top Features Learned by Naive Bayes:

num_late_payments                1.327877
credit_utilization_ratio         0.184890
education_level_Master           0.085362
loan_amount                      0.079617
account_tenure_years             0.074108
monthly_income                   0.052006
region_Urban                     0.040660
employment_type_Self-Employed    0.036752
region_Suburban                  0.025952
existing_loans_count             0.014408
Name: Default, dtype: float64


In [9]:
prompt = f"""
You are a financial data scientist.

A Naive Bayes model was trained to predict customer loan default risk.

Model Evaluation Results:
Accuracy: {acc:.3f}
F1 Score: {f1:.3f}
ROC-AUC: {auc:.3f}

Top Features that influence default risk:
{top_features.to_string()}

Please provide:

1. A professional summary of model performance
2. Financial insights about which customers are most likely to default
3. Interpretation of the most important features
4. Practical business recommendations for banks or lenders
"""

response = gemini.generate_content(prompt).text

print("\nGemini LLM Interpretation:\n")
print(response)


Gemini LLM Interpretation:

As a financial data scientist, I have analyzed the Naive Bayes model results. Below is a comprehensive breakdown of the model performance, risk drivers, and strategic recommendations for the lending institution.

---

### 1. Professional Summary of Model Performance
The model demonstrates **excellent discriminative power**, as evidenced by the **ROC-AUC of 0.920**. This indicates that the model has a 92% probability of ranking a random defaulting borrower higher in risk than a random non-defaulting borrower. 

While the **Accuracy (0.871)** is high, the **F1 Score (0.752)** is the more critical metric here. In credit risk, there is typically a class imbalance (fewer defaulters than non-defaulters). An F1 score of 0.752 suggests a solid balance between *Precision* (avoiding false accusations of default) and *Recall* (capturing as many actual defaults as possible). The model is robust, though there is slight room to improve the F1 score by tuning the classifi