In [None]:
import os
import zipfile
from google.colab import files, drive

# # Keggle API
# uploaded = files.upload()

# # Move kaggle.json to .kaggle & Authorization
# !mkdir -p ~/.kaggle
# !mv kaggle.json ~/.kaggle/

# !chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download Outside of Google Drive
# !kaggle datasets download -d dougandrade/dog-emotions-5-classes

In [None]:
drive.mount('/content/drive')

NameError: name 'drive' is not defined

In [None]:
# # Unzip
# with zipfile.ZipFile('/content/drive/MyDrive/train_images_5_class.zip', 'r') as zip_ref:
#     zip_ref.extractall(path='/content/drive/MyDrive/')

### Module Import

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import EfficientNet_V2_S_Weights, efficientnet_v2_s
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data.dataset import random_split
from PIL import Image
from torchvision import datasets, models
from copy import deepcopy
import cv2
import glob
import argparse
import time
import json
import tensorflow as tf

### GPU Setting

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPUs를 검색 및 해결
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print("Running on TPU")
except ValueError:
    print("No TPU found, using default strategy")
    strategy = tf.distribute.get_strategy()  # TPU가 없는 경우 기본 전략 사용 (예: CPU/GPU)

Running on TPU


In [None]:
# GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Import Datasets

In [None]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

root_dir = '/content/drive/MyDrive/ToyProject'

# 데이터셋 경로 설정
data_dir = f'{root_dir}/train_images_5_class'

# EfficientNet v2에 맞는 전처리 파이프라인 설정
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),  # EfficientNet의 입력 크기에 맞게 조정
    transforms.ToTensor(),  # 이미지를 Tensor로 변환
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Pre-trained 모델에 맞는 정규화
])

# ImageFolder를 사용하여 폴더 구조에서 데이터셋 로드
dataset = datasets.ImageFolder(root=data_dir, transform=preprocess)

# 클래스 레이블 확인
class_names = dataset.classes
print(f'Class names: {class_names}')

# train/validation split (70% train, 30% validation)
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size
trainset, valset = random_split(dataset, [train_size, val_size])

# 확인: 데이터셋 크기 출력
print(f'Total images: {len(dataset)}')
print(f'Training images: {train_size}')
print(f'Validation images: {val_size}')


Class names: ['alert', 'angry', 'frown', 'happy', 'relax']
Total images: 9325
Training images: 6527
Validation images: 2798


# Training & Validation Define

In [None]:
def train(net, trainloader, optimizer, criterion, args):

    net.train()

    correct = 0
    total = 0
    train_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad()

        # get the inputs
        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(trainloader)
    train_acc = 100 * correct / total
    return net, train_loss, train_acc

In [None]:
def validate(net, valloader, criterion, args):

    net.eval()

    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for data in valloader:
            images, labels = data

            images, labels = images.to(device), labels.to(device)
            outputs = net(images)

            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(valloader)
        val_acc = 100 * correct / total
    return val_loss, val_acc

In [None]:
def test(net, args):

    testloader = torch.utils.data.DataLoader(testset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False, num_workers=2)
    net.eval()

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data

            images, labels = images.to(device), labels.to(device)

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
    return test_acc

# Experiment

In [None]:
def experiment(args):

    # Dataloaders
    trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=args.train_batch_size,
                                          shuffle=True, num_workers=2)

    valloader = torch.utils.data.DataLoader(valset,
                                            batch_size=args.test_batch_size,
                                            shuffle=False, num_workers=2)

    # Model Define (with pretrained weights)
    net = models.efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)

    """
    EfficientNet의 self.classifier 구조

     self.classifier = nn.Sequential(
        nn.Dropout(p=dropout, inplace=True),
        nn.Linear(lastconv_output_channels, num_classes),
    )

    """
    net.classifier[1] = nn.Linear(net.classifier[1].in_features, args.out_dim)
    if hasattr(args, 'dropout_rate'):
        net.classifier.add_module("dropout", nn.Dropout(args.dropout_rate)) # dropout 사용할 경우 모델 아키텍처에 추가

    net = net.to(device)

    criterion = nn.CrossEntropyLoss()
    if args.optim == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('In-valid optimizer choice')

    # 스케줄러 정의
    if args.scheduler == 'ReduceLROnPlateau':
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
    elif args.scheduler == 'StepLR':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    elif args.scheduler == 'OneCycleLR':
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(trainloader), epochs=args.epoch)
    elif args.scheduler == 'CosineAnnealingLR':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epoch)
    else:
        scheduler = None

    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    best_acc = 0.0

    for epoch in range(args.epoch):  # loop over the dataset multiple times
        ts = time.time()
        net, train_loss, train_acc = train(net, trainloader, optimizer, criterion, args)
        val_loss, val_acc = validate(net, valloader, criterion, args)
        te = time.time()

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch + 1, train_acc, val_acc, train_loss, val_loss, te-ts))

        # 스케줄러 업데이트
        if scheduler is not None:
            if args.scheduler == 'ReduceLROnPlateau':
                scheduler.step(val_loss)
            else:
                scheduler.step()

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(net.state_dict(), 'your_best_model.pth')
            print(f'Best model saved with accuracy: {best_acc:2.2f}')

    result = {}
    result['train_losses'] = train_losses
    result['val_losses'] = val_losses
    result['train_accs'] = train_accs
    result['val_accs'] = val_accs
    result['train_acc'] = train_acc
    result['val_acc'] = val_acc
    result['best_acc'] = best_acc
    return vars(args), result,net

### Saving & Loading Experiment Results

In [None]:
import hashlib
import json
from os import listdir
from os.path import isfile, join
import pandas as pd

def save_exp_result(setting, result):
    exp_name = setting['exp_name']
    # del setting['epoch']
    del setting['test_batch_size']

    results_dir = f'{root_dir}/results'
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
    filename = f'{root_dir}/results/{exp_name}-{hash_key}.json'
    result.update(setting)
    with open(filename, 'w') as f:
        json.dump(result, f)


def load_exp_result(exp_name):
    dir_path = './results'
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
    list_result = []
    for filename in filenames:
        if exp_name in filename:
            with open(join(dir_path, filename), 'r') as infile:
                results = json.load(infile)
                list_result.append(results)
    df = pd.DataFrame(list_result) # .drop(columns=[])
    return df

### Experiemt Parameters

In [None]:
# ====== Random Seed Initialization ====== #
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp3_epoch"

# ====== Model ====== #
args.out_dim = 5
args.act = 'relu'

# ====== Regularization ======= #
args.l2 = 0.00001

# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.0001
args.lr_decay = 0.95
args.scheduler = 'CosineAnnealingLR'
#args.epoch = 10

args.dropout_rate = 0.3

args.train_batch_size = 64
args.test_batch_size = 128

In [None]:
import multiprocessing as mp
mp.set_start_method('spawn', force=True)

# ====== Experiment Variable ====== #
name_var1 = 'lr'
name_var2 = 'epoch'
list_var1 = [0.0001]
list_var2 = [20, 30]


setattr(trainset, 'transform', None)
setattr(valset, 'transform', None)
# setattr(testset, 'transform', None)

for var1 in list_var1:
    for var2 in list_var2:
        setattr(args, name_var1, var1)
        setattr(args, name_var2, var2)
        print(args)

        setting, result,net = experiment(deepcopy(args))
        save_exp_result(setting, result)

Namespace(exp_name='exp3_epoch', out_dim=5, act='relu', l2=1e-05, optim='RMSprop', lr=0.0001, lr_decay=0.95, scheduler='CosineAnnealingLR', dropout_rate=0.3, train_batch_size=64, test_batch_size=128, epoch=20)
Epoch 1, Acc(train/val): 52.57/70.16, Loss(train/val) 1.09/0.78. Took 247.46 sec
Best model saved with accuracy: 70.16
Epoch 2, Acc(train/val): 67.04/73.12, Loss(train/val) 0.76/0.70. Took 248.09 sec
Best model saved with accuracy: 73.12
Epoch 3, Acc(train/val): 75.47/72.37, Loss(train/val) 0.56/0.76. Took 246.92 sec
Epoch 4, Acc(train/val): 81.48/72.02, Loss(train/val) 0.41/0.84. Took 248.72 sec
Epoch 5, Acc(train/val): 82.96/72.12, Loss(train/val) 0.34/1.02. Took 248.61 sec
Epoch 6, Acc(train/val): 84.08/73.45, Loss(train/val) 0.30/1.10. Took 253.66 sec
Best model saved with accuracy: 73.45
Epoch 7, Acc(train/val): 84.65/71.91, Loss(train/val) 0.28/1.16. Took 253.21 sec
Epoch 8, Acc(train/val): 84.88/72.09, Loss(train/val) 0.27/1.19. Took 248.78 sec
Epoch 9, Acc(train/val): 85.

In [None]:
# 학습 완료 후 your_best_model.pth 로드
net.load_state_dict(torch.load('your_best_model.pth'))

# 테스트 실행
test_acc = test(net, args)
print(f'Test accuracy: {test_acc:.2f}%')