# dance - ballade 분류(RGG Ver)
- 프로젝트에 사용된 파일

In [4]:
# 모듈 로딩
import pandas as pd
import numpy as np

from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, WeightedRandomSampler


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
import matplotlib.pyplot as plt

# 데이터 불러오기

In [5]:
from torchvision.datasets import ImageFolder 
from torchvision.transforms import transforms
from PIL import Image

# 데이터셋 경로
train_root = "./data/train"
test_root = "./data/test"

# 전처리를 위한 변환
preprocessing = transforms.Compose([
    transforms.Resize((100, 100)),  # 이미지 크기 조정
    transforms.Grayscale(),  # 이미지를 흑백으로 변환
    # transforms.RandomRotation(degrees=(90), interpolation=transforms.InterpolationMode.BILINEAR, fill=0),
    transforms.ToTensor(),  # 텐서로 변환
])

# 데이터셋 로드
trainDS = ImageFolder(root=train_root, transform=preprocessing)
testDS = ImageFolder(root=test_root, transform=preprocessing)

In [6]:
trainDS.samples

[('./data/train\\ballade\\balad1.png', 0),
 ('./data/train\\ballade\\balad10.png', 0),
 ('./data/train\\ballade\\balad100.png', 0),
 ('./data/train\\ballade\\balad11.png', 0),
 ('./data/train\\ballade\\balad12.png', 0),
 ('./data/train\\ballade\\balad13.png', 0),
 ('./data/train\\ballade\\balad14.png', 0),
 ('./data/train\\ballade\\balad15.png', 0),
 ('./data/train\\ballade\\balad16.png', 0),
 ('./data/train\\ballade\\balad17.png', 0),
 ('./data/train\\ballade\\balad18.png', 0),
 ('./data/train\\ballade\\balad2.png', 0),
 ('./data/train\\ballade\\balad20.png', 0),
 ('./data/train\\ballade\\balad21.png', 0),
 ('./data/train\\ballade\\balad22.png', 0),
 ('./data/train\\ballade\\balad23.png', 0),
 ('./data/train\\ballade\\balad25.png', 0),
 ('./data/train\\ballade\\balad26.png', 0),
 ('./data/train\\ballade\\balad28.png', 0),
 ('./data/train\\ballade\\balad29.png', 0),
 ('./data/train\\ballade\\balad3.png', 0),
 ('./data/train\\ballade\\balad30.png', 0),
 ('./data/train\\ballade\\balad32.

## 데이터 셋이 잘 읽혔는지 확인

In [7]:
trainDS.classes, trainDS.class_to_idx

(['ballade', 'dance'], {'ballade': 0, 'dance': 1})

In [8]:
# valDS.classes, valDS.class_to_idx

In [9]:
testDS.classes, testDS.class_to_idx

(['ballade', 'dance'], {'ballade': 0, 'dance': 1})

In [10]:
# 비율 확인
len(trainDS), len(testDS)

(160, 20)

In [11]:
trainDS[0][0]

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [12]:
# sampler -> 각 배치의 분포를 맞추기 위함 
# weights = make_weights(imgDS.targets, len(imgDS.classes))
weights = torch.ones(len(trainDS.targets), dtype=torch.float)
sampler = WeightedRandomSampler(weights, len(weights))

train_DL = DataLoader(trainDS, batch_size=20, shuffle=True)
test_DL = DataLoader(testDS, batch_size=20)  # 테스트 데이터 로더도 동일한 배치 크기를 설정합니다.


In [13]:
# max = 10
# for cnt, (_, label) in enumerate(val_DL):
#     print(f'batch별 target 분포\n{label.bincount()}\n')
#     if cnt > max:
#         break

In [14]:
len(train_DL), len(test_DL) # 배치 개수

(8, 1)

# 학습 준비

In [15]:
### ===> 딥러닝 모델을 설계할 때 활용하는 장비 확인
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 2.2.2  Device: cpu


In [16]:
# 데이터 모양... RNN이 뭔데 날 울려
sequence_length = 100 # 
input_size = 2 # input_x에 대한 feature의 수 
hidden_size = 64
num_layers = 16
num_classes = 2
num_epochs = 20
learning_rate = 0.001

# RNN CLASS

In [17]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True) # 양방향 LSTM
        # 피쳐수, 히든 수, 레이어 수 
        
        self.fc = nn.Linear(hidden_size * 2, num_classes)  # 양방향 LSTM이므로 hidden_size * 2
        
    # input x -> (BATCH, LENGTH, INPUT_SIZE)
    # 단방향
    def forward(self, x): 
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(DEVICE) # 양방향 LSTM이므로 num_layers * 2
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(DEVICE) # hidden state와 동일
        result, _ = self.lstm(x, (h0, c0))                                           # output : (BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE * 2)
        # self.lstm(batch_size, 시퀀스 길이, input_size)
        result = self.fc(result[:, -1, :])                                              # logit 
        return result


# 학습 및 평가

In [18]:
# 모델 할당 후 학습
model = RNN(input_size, hidden_size, num_layers, num_classes).to(DEVICE) # 

# 손실 함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss().to(DEVICE) # 분류
optimizer = torch.optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

# 학습

total_step = len(train_DL) # 배치 개수
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_DL):
       # print(images.shape, images.reshape(-1, sequence_length, input_size).shape)
       images = images.reshape(-1, 100, 100).to(DEVICE) # (BATCH, 1, 28, 28) -> (BATCH, 28, 28)
       
       # print("이미지", images.shape)
       var, mean = torch.var_mean(images, dim=1, keepdim=True)
       # print(var.shape)
       # print(var.permute(0, 2, 1).shape)
       timed_var = var.permute(0, 2, 1)
       timed_mean = mean.permute(0, 2, 1)
       
       timed_feature = torch.cat((timed_var, timed_mean), dim=-1)
       # print(timed_feature.shape)
       
       labels = labels.to(DEVICE)
       print(labels)

       # 순전파
       outputs = model(timed_feature)
       print(torch.argmax(outputs, dim=1),len(torch.argmax(outputs, dim=1)))
       # print(labels)
       # print()
       
    #    print(outputs.shape)
    #    print()
    #    print(labels.shape)
       loss = criterion(outputs, labels)

       # 역전파 & 최적화
       optimizer.zero_grad()
       loss.backward()
       optimizer.step()

       if (i+1) % 1 == 0: 
         print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
             epoch+1, num_epochs, i+1, total_step, loss.item()))

tensor([0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 20
Epoch [1/20], Step [1/8], Loss: 0.6790
tensor([0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 20
Epoch [1/20], Step [2/8], Loss: 0.6766
tensor([1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 20
Epoch [1/20], Step [3/8], Loss: 0.7458
tensor([1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 20
Epoch [1/20], Step [4/8], Loss: 0.7410
tensor([1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 20
Epoch [1/20], Step [5/8], Loss: 0.7462
tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1])
tensor([0, 0, 0, 0, 0, 0, 0, 0,

# 모델 평가

In [19]:
model.eval() # Dropout, Batchnorm 등 실행 x
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_DL:
        images = images.reshape(-1, 100, 100).to(DEVICE)
        
        var, mean = torch.var_mean(images, dim=1, keepdim=True)
        # print(var.shape)
        # # print(var.permute(0, 2, 1).shape)
        timed_var = var.permute(0, 2, 1)
        timed_mean = var.permute(0, 2, 1)
        
        timed_feature = torch.cat((timed_var, timed_mean), dim=-1)
        # print(timed_feature.shape)
        labels = labels.to(DEVICE)
        print(labels)
        
        outputs = model(timed_feature)
    
        _, predicted = torch.max(outputs, 1) # logit(확률)이 가장 큰 class index 반환
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy: {} %'.format(100 * correct / total)) 


tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
Accuracy: 50.0 %


In [20]:
a = torch.tensor([
	[ 1.5027, -0.3270,  0.5905,  0.],
	[-1.5745,  1.3330, -0.5596, -0.],
	[ 0.1264, -0.5080,  1.6420,  0.]])
var, mean = torch.var_mean(a, dim=0, keepdim=True)
var = var.permute(1, 0)
mean = mean.permute(1, 0)

torch.cat((var, mean), dim=1)

tensor([[2.3761, 0.0182],
        [1.0296, 0.1660],
        [1.2126, 0.5576],
        [0.0000, 0.0000]])

In [None]:
# torch.save(model.state_dict(), 'RNN.pth')