In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np

# Early Stopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=10, delta=0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


# Custom Dataset 클래스 정의
class CreditDataset(Dataset):
    def __init__(self, features, targets=None):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        x = torch.tensor(self.features.iloc[idx].values.astype(np.float32), dtype=torch.float32)
        if self.targets is not None:
            y = torch.tensor(self.targets.iloc[idx], dtype=torch.float32)
            return x, y
        return x

# 데이터 불러오기
train_features = pd.read_csv("./data/train_features.csv")  # train_df_features를 csv로 저장했다고 가정
train_target = pd.read_csv("./data/train_target.csv")  # train_df_target을 csv로 저장했다고 가정
test_features = pd.read_csv("./data/test_features.csv")  # test_df를 csv로 저장했다고 가정

# Train-validation split (간단히 train 데이터를 나눔)
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(train_features, train_target, test_size=0.2, random_state=42)

train_dataset = CreditDataset(train_x, train_y)
val_dataset = CreditDataset(val_x, val_y)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# Dataset과 DataLoader 생성
train_dataset = CreditDataset(train_features, train_target)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = CreditDataset(test_features)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 모델 정의
input_dim = train_features.shape[1]
class CreditModel(nn.Module):
    def __init__(self, input_dim):
        super(CreditModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

model = CreditModel(input_dim)

# 손실 함수와 옵티마이저 정의
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [3]:
import torch
from torch.utils.tensorboard import SummaryWriter

In [6]:
# TensorBoard SummaryWriter 생성
writer = SummaryWriter(log_dir='./runs/credit_model2')

# 학습 루프
epochs = 500
early_stopping = EarlyStopping(patience=10, delta=0.001)

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for x, y in train_loader:
        optimizer.zero_grad()
        y_pred = model(x).squeeze()
        y = y.squeeze()
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    # 검증 단계
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, y in val_loader:
            y_pred = model(x).squeeze()
            y = y.squeeze()
            loss = criterion(y_pred, y)
            val_loss += loss.item()

    val_loss /= len(val_loader)

    # TensorBoard에 손실 기록
    writer.add_scalar('Loss/Train', train_loss, epoch)
    writer.add_scalar('Loss/Validation', val_loss, epoch)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

    # Early Stopping 체크
    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break

# 모델 매개변수 및 그래프 저장
dummy_input = torch.rand(1, input_dim)
writer.add_graph(model, dummy_input)
writer.close()

  y = torch.tensor(self.targets.iloc[idx], dtype=torch.float32)


Epoch 1/500, Train Loss: 0.5675, Validation Loss: 0.5757
Epoch 2/500, Train Loss: 0.5610, Validation Loss: 0.5744
Epoch 3/500, Train Loss: 0.5567, Validation Loss: 0.5734
Epoch 4/500, Train Loss: 0.5553, Validation Loss: 0.5684
Epoch 5/500, Train Loss: 0.5523, Validation Loss: 0.5685
Epoch 6/500, Train Loss: 0.5510, Validation Loss: 0.5707
Epoch 7/500, Train Loss: 0.5483, Validation Loss: 0.5655
Epoch 8/500, Train Loss: 0.5452, Validation Loss: 0.5628
Epoch 9/500, Train Loss: 0.5433, Validation Loss: 0.5657
Epoch 10/500, Train Loss: 0.5422, Validation Loss: 0.5624
Epoch 11/500, Train Loss: 0.5403, Validation Loss: 0.5626
Epoch 12/500, Train Loss: 0.5384, Validation Loss: 0.5614
Epoch 13/500, Train Loss: 0.5368, Validation Loss: 0.5611
Epoch 14/500, Train Loss: 0.5344, Validation Loss: 0.5539
Epoch 15/500, Train Loss: 0.5336, Validation Loss: 0.5523
Epoch 16/500, Train Loss: 0.5314, Validation Loss: 0.5529
Epoch 17/500, Train Loss: 0.5284, Validation Loss: 0.5502
Epoch 18/500, Train Los

In [7]:
# 모델을 평가 모드로 설정
model.eval()

# 테스트 데이터 예측
predictions = []
with torch.no_grad():
    for x in test_loader:
        preds = model(x).squeeze()
        predictions.extend(preds.numpy())  # 예측값을 리스트에 추가

# 예측값을 0과 1로 이진화 (임계값 0.5 사용)
binary_predictions = [1 if p >= 0.5 else 0 for p in predictions]

# 예측값 출력
print("Predictions (raw):", predictions)
print("Predictions (binary):", binary_predictions)


Predictions (raw): [np.float32(0.13004428), np.float32(0.799809), np.float32(0.1253235), np.float32(0.074815184), np.float32(0.18535051), np.float32(0.014644205), np.float32(0.65399015), np.float32(0.3896549), np.float32(2.9137436e-06), np.float32(0.46868414), np.float32(0.4833834), np.float32(0.29920343), np.float32(0.011278153), np.float32(0.00017466187), np.float32(0.37467623), np.float32(0.6635267), np.float32(0.66882116), np.float32(0.10817566), np.float32(0.0051504723), np.float32(0.4472827), np.float32(0.2780941), np.float32(0.0054470347), np.float32(0.29366085), np.float32(0.18642955), np.float32(0.12969199), np.float32(0.88950425), np.float32(0.23296657), np.float32(0.21620743), np.float32(0.009923902), np.float32(0.98773205), np.float32(0.2426464), np.float32(0.04498593), np.float32(0.1440728), np.float32(0.04348861), np.float32(0.1679101), np.float32(0.31757653), np.float32(0.924432), np.float32(0.0017894352), np.float32(0.7602576), np.float32(0.3082988), np.float32(0.491586

In [None]:
from service.utils import make_submit


In [8]:
import os
from datetime import datetime

today = datetime.today().strftime('%Y-%m-%d')
base_filename = f'./data/submission_{today}.csv'
filename = base_filename
counter = 1

while os.path.exists(filename):
    filename = f'./data/submission_{today}_{counter}.csv'
    counter += 1



submit_df = pd.read_csv('./data/sample_submission.csv')
submit_df['채무 불이행 확률'] = pd.Series(binary_predictions)
submit_df.to_csv(filename, index=False)
