# Desenvolvimento do Modelo de Classificação de Score de Crédito

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import re

# Carregar os dados diretamente do arquivo original
df = pd.read_csv("train.csv")

def to_float(val):
    try:
        return float(str(val).replace("_", "").replace(",", ""))
    except:
        return np.nan

def extract_months(age_str):
    if isinstance(age_str, str):
        match = re.search(r"(\d+)\s*Years?.*?(\d+)?\s*Months?", age_str)
        if match:
            years = int(match.group(1))
            months = int(match.group(2)) if match.group(2) else 0
            return years * 12 + months
    return np.nan

def to_numeric(val):
    try:
        return pd.to_numeric(val)
    except:
        return np.nan

# Conversões
cols_to_float = ['Annual_Income', 'Outstanding_Debt', 'Amount_invested_monthly', 'Monthly_Balance']
for col in cols_to_float:
    df[col] = df[col].apply(to_float)

df['Age'] = df['Age'].apply(to_numeric)
df['Num_of_Loan'] = df['Num_of_Loan'].apply(to_numeric)
df['Num_of_Delayed_Payment'] = df['Num_of_Delayed_Payment'].apply(to_numeric)
df['Credit_History_Age'] = df['Credit_History_Age'].apply(extract_months)

# Preencher valores ausentes
num_cols = df.select_dtypes(include=[np.number]).columns
cat_cols = df.select_dtypes(include=['object']).columns.drop(['Credit_Score'])

for col in num_cols:
    df[col].fillna(df[col].mean(), inplace=True)

for col in cat_cols:
    df[col].fillna("Unknown", inplace=True)

# Transformar variável alvo
label_encoder = LabelEncoder()
df['Credit_Score_Label'] = label_encoder.fit_transform(df['Credit_Score'])

# Remover colunas irrelevantes
df.drop(columns=['ID', 'Customer_ID', 'Name', 'SSN', 'Month', 'Credit_Score'], inplace=True)

# Separar X e y
X = df.drop(columns=['Credit_Score_Label'])
y = df['Credit_Score_Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Modelos de Classificação

In [None]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Decision Tree
dt_model = DecisionTreeClassifier(max_depth=5, random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt))
print("Matriz de Confusão:")
print(confusion_matrix(y_test, y_pred_dt))


In [None]:

# Logistic Regression
lr_model = LogisticRegression(max_iter=500)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

print("Logistic Regression Classification Report:")
print(classification_report(y_test, y_pred_lr))
print("Matriz de Confusão:")
print(confusion_matrix(y_test, y_pred_lr))


### Random Forest Classifier

In [None]:

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))
print("Matriz de Confusão:")
print(confusion_matrix(y_test, y_pred_rf))


### Gradient Boosting Classifier

In [None]:

from sklearn.ensemble import GradientBoostingClassifier

gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

print("Gradient Boosting Classification Report:")
print(classification_report(y_test, y_pred_gb))
print("Matriz de Confusão:")
print(confusion_matrix(y_test, y_pred_gb))


### XGBoost Classifier

In [None]:

from xgboost import XGBClassifier

xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

print("XGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))
print("Matriz de Confusão:")
print(confusion_matrix(y_test, y_pred_xgb))
