# 데이터셋 설정

- train 데이터 >> val 증강 + train 증강
- val 데이터 >> train의 원본데이터 + val의 원본데이터 (3832장 에서)
- test 데이터 >> val 데이터에서 50%

(val / test)데이터는 절대 건드리면 안됨 !

## 성능 개선
- batch size , random_state 조절
- lr, weight_delay, dropout, neuron 조절

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.optim.lr_scheduler import CosineAnnealingLR
import copy
import pandas as pd
import numpy as np
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torchvision.models import DenseNet201_Weights, VGG19_Weights

class SkinDataset(Dataset):
    def __init__(self, csv_file=None, r_peroucular_folder=None, l_peroucular_folder=None, image_paths=None, labels=None, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform

        if csv_file is not None and r_peroucular_folder is not None and l_peroucular_folder is not None:
            # csv 파일 로드
            df = pd.read_csv(csv_file)

            # 오른쪽 눈가 이미지 처리
            for folder in r_peroucular_folder:  # 각 폴더에 대해 반복
                for image_file in os.listdir(folder):
                    if image_file.endswith('.jpg'):
                        image_path = os.path.join(folder, image_file)
                        image_id = image_file.split('_')[0]  # _기준으로 앞의 값을 id로 지정
                        label_data = df[df['ID'] == int(image_id)]['r_perocular_wrinkle'].values
                        if len(label_data) > 0:
                            label = label_data[0]
                            self.image_paths.append(image_path)
                            self.labels.append(label)

            # 왼쪽 눈가 이미지 처리
            for folder in l_peroucular_folder:  # 각 폴더에 대해 반복
                for image_file in os.listdir(folder):
                    if image_file.endswith('.jpg'):
                        image_path = os.path.join(folder, image_file)
                        image_id = image_file.split('_')[0]  # _기준으로 앞의 값을 id로 지정
                        label_data = df[df['ID'] == int(image_id)]['l_perocular_wrinkle'].values
                        if len(label_data) > 0:
                            label = label_data[0]
                            self.image_paths.append(image_path)
                            self.labels.append(label)

        elif image_paths is not None and labels is not None:
            self.image_paths = image_paths
            self.labels = labels
        else:
            raise ValueError("Either (csv_file, r_peroucular_folder, l_peroucular_folder) or (image_paths, labels) must be provided")

        # 넘파이 배열로 변경
        self.image_paths = np.array(self.image_paths)
        self.labels = np.array(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label

# 데이터 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#train data 구성
csv_file = "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/annotation/annotation_class2.csv"

#오른쪽 눈가 train 폴더
train_r_peroucular_folder = ["/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/color_minus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/color_plus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/cut",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/horizon",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/rotation_minus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/rotation_plus_10"]

#왼쪽 눈가 train 폴더

train_l_peroucular_folder = ["/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/color_minus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/color_plus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/cut",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/horizon",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/rotation_minus_10",
                             "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/rotation_plus_10"]

#val data 구성
csv_file = "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/annotation/annotation_class2.csv"
val_r_peroucular_folder = ["/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/r_peroucular_origin",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/r_peroucular/smart_pad",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/val/r_peroucular/r_peroucular_origin",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/val/r_peroucular/smart_pad"]

val_l_peroucular_folder = ["/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/l_peroucular_origin",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/train/l_peroucular/smart_pad",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/val/l_peroucular/l_peroucular_origin",
                           "/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/data/image/Orientation/val/l_peroucular/smart_pad"]

# 데이터셋 생성
train_dataset = SkinDataset(csv_file=csv_file, r_peroucular_folder=train_r_peroucular_folder, l_peroucular_folder=train_l_peroucular_folder, transform=transform)
val_dataset = SkinDataset(csv_file=csv_file, r_peroucular_folder=val_r_peroucular_folder, l_peroucular_folder=val_l_peroucular_folder, transform=transform)

# Validation 및 Test 데이터셋 분할
val_image_paths, test_image_paths, val_labels, test_labels = train_test_split(
    val_dataset.image_paths, val_dataset.labels, test_size=0.5, random_state=42, stratify=val_dataset.labels
)

# Validation 및 Test 데이터셋 생성
validation_dataset = SkinDataset(image_paths=val_image_paths, labels=val_labels, transform=transform)
test_dataset = SkinDataset(image_paths=test_image_paths, labels=test_labels, transform=transform)

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=3)
val_loader = DataLoader(validation_dataset, batch_size=10, shuffle=False, num_workers=3)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False, num_workers=3)

In [None]:
# 데이터셋 생성
train_dataset = SkinDataset(csv_file, train_r_peroucular_folder, train_l_peroucular_folder, transform=transform)
val_dataset = SkinDataset(csv_file=csv_file, r_peroucular_folder=val_r_peroucular_folder, l_peroucular_folder=val_l_peroucular_folder, transform=transform)
validation_dataset = SkinDataset(image_paths=val_image_paths, labels=val_labels, transform=transform)
test_dataset = SkinDataset(image_paths=test_image_paths, labels=test_labels, transform=transform)
# 디버깅을 위해 데이터셋 크기 출력
print(f"Train dataset size: {len(train_dataset)}")
print(f"Val dataset size: {len(val_dataset)}")
print(f"Validation dataset size: {len(validation_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

# 모델 정의 (DenseNet + VGG19)

In [None]:
# @title
# 앙상블 모델 정의
class DenseNet201_VGG19_Ensemble(nn.Module):
    def __init__(self, num_classes):
        super(DenseNet201_VGG19_Ensemble, self).__init__()

        # DenseNet201 정의
        self.densenet = models.densenet201(weights=DenseNet201_Weights.DEFAULT)
        densenet_features = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Identity()  # 최종 분류기를 제거하고 특징만 추출

        # VGG19 정의
        self.vgg = models.vgg19(weights=VGG19_Weights.DEFAULT)
        vgg_features = self.vgg.classifier[0].in_features
        self.vgg.classifier = nn.Identity()  # 최종 분류기를 제거하고 특징만 추출

        # 두 모델의 특징을 결합하는 계층 수정
        self.classifier = nn.Sequential(
            nn.Linear(densenet_features + vgg_features, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),  # 드롭아웃 확률을 0.6으로 증가
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),  # 드롭아웃 확률을 0.6으로 증가
            nn.Linear(512, num_classes)
        )


    def forward(self, x):
        # DenseNet201 특징 추출
        densenet_features = self.densenet(x)

        # VGG19 특징 추출
        vgg_features = self.vgg(x)

        # 두 특징을 결합
        combined_features = torch.cat((densenet_features, vgg_features), dim=1)

        # 최종 분류
        output = self.classifier(combined_features)
        return output

# `device` 변수 선언
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 클래스 가중치 정의
class_weights = torch.tensor([2.6, 1.6], dtype=torch.float32).to(device)

num_classes = 2  # 주름 분류를 위한 클래스 수 (0과 1)
model = DenseNet201_VGG19_Ensemble(num_classes=num_classes).to(device)

# 모델의 AlexNet과 VGG19 레이어를 고정
for param in model.densenet.parameters():
    param.requires_grad = False
for param in model.vgg.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss(weight=class_weights)  # 클래스 가중치 추가
# SGD 옵티마이저 사용
optimizer = optim.SGD(model.parameters(), lr=0.000291685854174142, momentum=0.9, weight_decay=0.006093049048814643)
scheduler = CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-6)

# early Stopping

In [None]:
# @title
# 조기 종료 클래스
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta

    def __call__(self, val_loss):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score

        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

# 모델 학습 및 검증

- train과 test

In [None]:
# @title
# 모델 학습 및 검증
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=5):
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        corrects = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).long()

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)
            total += labels.size(0)
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = corrects.double() / total

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_corrects = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device).long()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item() * inputs.size(0)

                _, preds = torch.max(outputs, 1)
                val_corrects += torch.sum(preds == labels.data)
                val_total += labels.size(0)

        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_corrects.double() / val_total

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_acc:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')

        scheduler.step()

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, model_save_path)

        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    model.load_state_dict(best_model_wts)
    return model

model_save_path = '/content/drive/MyDrive/Final_project_2조/02_2. 전처리 및 EDA_이미지/예진님/Ensemble/models/persocular_ensemble_model_v1.pth'

model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=5)

# 평가


In [None]:
# @title
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# 모델 평가
model = DenseNet201_VGG19_Ensemble(num_classes=num_classes).to(device)
model.load_state_dict(torch.load(model_save_path))  # 저장된 가중치를 불러옴
model.eval()

# 검증 데이터셋에 대한 예측
true_labels = []
predicted_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

# 평가 지표 계산
accuracy = accuracy_score(true_labels, predicted_labels)
print(f'Accuracy: {accuracy:.4f}')

print("Classification Report:")
print(classification_report(true_labels, predicted_labels))

# 혼동 행렬 그리기
cm = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt="d")