Sampler 사용해보기

In [4]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.functional import F
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

In [5]:
pork_folder = '../DATA/Pork_images/'
beef_folder = '../DATA/Beef_images/'
pork_images, beef_images = os.listdir(pork_folder), os.listdir(beef_folder)

In [6]:
# 오류 없는 이미지 파일명 리스트 생성
valid_pork_images, valid_beef_images = [], []
for image in pork_images:
    # 로드 오류 시 파일명 출력
    try:
        img = Image.open(pork_folder + image)
        valid_pork_images.append(image)
    except:
        print(image)
        continue
for image in beef_images:
    try:
        img = Image.open(beef_folder + image)
        valid_beef_images.append(image)
    except:
        print(image)
        continue

1000273355264_i1_750.jpg
1000522472927_i1_750.jpg
1000531208174_i1_750.jpg
1000546285252_i1_750.jpg
4-thuc-pham-dai-ky-thit-lon-loai-thu-4.jpg
fresh-pork-meat-cooking-pork-loin-tray_75924-26230.jpg
fresh-pork-neck-raw-or-collar-pork-on-board-with-ingredients-for-marinated_1339-154331.jpg
Gia-heo-hoi-mien-Nam-xuong-doc-thi-truong-xuat.jpg
meat_144627-27534.jpg
pork-meat-large-piece-of-raw-pork-on-a-dark-rustic-table_256259-522.jpg
raw-pork-meat-isolated_1203-6736.jpg_size=626&ext=jpg&ga=GA1.1.1546980028.jpg
raw-pork-meat_1472-13371.jpg
%EC%83%9D-%EC%87%A0%EA%B3%A0%EA%B8%B0-%EC%8A%A4%ED%85%8C%EC%9D%B4%ED%81%AC-%EC%BB%A4%ED%8B%80%EB%A6%BF%EA%B3%BC-%ED%96%A5%EC%8B%A0%EB%A3%8C%EB%A5%BC-%EA%B3%81%EB%93%A4%EC%9D%B8-%EC%83%A4%EC%8A%90%EB%A6%AD.jpg
1000045412772_i1_750.jpg
1000520047500_i1_750.jpg
1000555929992_i1_750.jpg
4H3PG5YFPJDIHN6FYSKPYDWOSA.jpg
860x860_%EC%97%90%EC%9D%B4%EC%A7%95%EA%B7%B8%EB%9D%BC%EC%9A%B4%EB%93%9C_%EA%B0%88%EB%A6%AD%EC%97%90%EC%9D%B4%EC%A7%95%EC%86%8C%EA%B3%A0%EA%B8%B0

In [7]:
# .Compose로 해보기, .convert('RGB') 추가
transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor()
])
pork_data, beef_data = [], []
for image in valid_pork_images:
    img = Image.open(pork_folder + image)
    if img.mode == 'RGBA':
        img = img.convert('RGB')
    img = transform(img)
    if img.shape[0] != 3:
        print(image)
    else:
        pork_data.append(img)
print('Pork :', len(pork_data), pork_data[0].shape, pork_data[0].dtype)

for image in valid_beef_images:
    img = Image.open(beef_folder + image)
    if img.mode == 'RGBA':
        img = img.convert('RGB')
    img = transform(img)
    beef_data.append(img)
print('Beef :', len(beef_data), beef_data[0].shape, beef_data[0].dtype)

png-transparent-black-iberian-pig-ham-boston-butt-pork-sirloin-steak-ham-beef-animal-source-foods-pork.jpg
Pork : 227 torch.Size([3, 600, 600]) torch.float32
Beef : 207 torch.Size([3, 600, 600]) torch.float32


In [8]:
# class MeatDataset : __init__ (super), __len__, __getitem__ 구현
from torch.utils.data import Dataset, DataLoader

class MeatDataset(Dataset):
    def __init__(self, data, label):
        super(MeatDataset, self).__init__()
        self.data = data
        self.label = torch.tensor(label, dtype=torch.long)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [9]:
# label 생성
pork_label = [0] * len(pork_data)
beef_label = [1] * len(beef_data)
labels = pork_label + beef_label
len(labels)

434

In [16]:
# 클래스별 비율 계산
pork_ratio = len(pork_data) / len(labels)
beef_ratio = len(beef_data) / len(labels)
print('Pork ratio :', pork_ratio, 'Beef ratio :', beef_ratio)

Pork ratio : 0.5230414746543779 Beef ratio : 0.4769585253456221


In [17]:
# dataset 생성
pork_dataset = MeatDataset(pork_data, pork_label)
beef_dataset = MeatDataset(beef_data, beef_label)

# 결합
dataset = torch.utils.data.ConcatDataset([pork_dataset, beef_dataset])
print('Dataset :', len(dataset))

# split
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=11, shuffle=True, stratify=labels)
print('Train :', len(train_data), 'Test :', len(test_data))

# Sampler 생성 : WeightedRandomSampler
from torch.utils.data import WeightedRandomSampler

# 클래스별 비율 계산
pork_ratio = len(pork_data) / len(labels)
beef_ratio = len(beef_data) / len(labels)
# print('Pork ratio :', pork_ratio, 'Beef ratio :', beef_ratio)

sampler = WeightedRandomSampler([pork_ratio, beef_ratio], len(train_data), replacement=True)
# WeightedRandomSampler(weights, num_samples, replacement=True)
# - weights : 각 샘플이 선택될 확률 : 
# - num_samples : 샘플 개수
# - replacement : True이면 복원추출, False이면 비복원추출

# dataloader 생성
Batch = 8   # 모든 배치 수는 8
train_loader = DataLoader(train_data, batch_size=Batch, sampler=sampler, shuffle=False)     # sampler 추가시 shuffle=False
test_loader = DataLoader(test_data, batch_size=Batch, shuffle=False)
print('Train :', len(train_loader), 'Test :', len(test_loader))

Dataset : 434
Train : 347 Test : 87
Train : 44 Test : 11


In [18]:
class MeatModel(nn.Module):
    def __init__(self):
        super(MeatModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # (3, 600, 600) -> (32, 600, 600), 가로세로는 일정
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # (32, 600, 600) -> (64, 600, 600)
        self.pool = nn.MaxPool2d(kernel_size=2)  # (64, 600, 600) -> (64, 300, 300) -> (64, 150, 150)
        self.fc1 = nn.Linear(64 * 150 * 150, 128)
        self.fc2 = nn.Linear(128, 2)    # 0, 1이면 2개의 클래스, 1인가 2인가?
        
    def forward(self, x):
        y = F.relu(self.conv1(x))   # (3, 600, 600) -> (32, 600, 600)
        y = self.pool(y)            # (32, 600, 600) -> (32, 300, 300)
        y = F.relu(self.conv2(y))   # (32, 300, 300) -> (64, 300, 300)
        y = self.pool(y)            # (64, 300, 300) -> (64, 150, 150)
        
        y = y.view(-1, 64 * 150 * 150)  # 1차원으로 펼치기 : ?? 근데 왜 300이지? -> 150 맞다! fc1을 300으로 해서 오류
        y = F.relu(self.fc1(y))    # (64 * 150 * 150) -> (128)
        y = self.fc2(y)            # (128) -> (2)
        return y

In [20]:
# model evaluation test
model = MeatModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# train
Epoch = 3
for epoch in range(Epoch):
    model.train()
    for i, (data, label) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        label = label.view(-1)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        if i % 3 == 0:  # 이거 왜 0만 출력? -> break 위치 잘못 ㅠ
            print(data.shape, label)
            print(f'Epoch : {epoch} Iter : {i} Loss : {loss.item()}')
    break


# test
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for i, (data, label) in enumerate(test_loader):
        output = model(data)
        label = label.view(-1)
        _, predicted = torch.max(output, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()
    print('Accuracy :', correct / total)

torch.Size([8, 3, 600, 600]) tensor([1, 1, 1, 1, 1, 1, 1, 1])
Epoch : 0 Iter : 0 Loss : 0.7327685356140137
torch.Size([8, 3, 600, 600]) tensor([0, 1, 1, 1, 1, 0, 0, 0])
Epoch : 0 Iter : 3 Loss : 0.0007033762522041798
torch.Size([8, 3, 600, 600]) tensor([1, 0, 0, 1, 0, 0, 1, 1])
Epoch : 0 Iter : 6 Loss : 3.5762778338721546e-07
torch.Size([8, 3, 600, 600]) tensor([1, 1, 1, 0, 0, 1, 0, 1])
Epoch : 0 Iter : 9 Loss : 2.7178732125321403e-05
torch.Size([8, 3, 600, 600]) tensor([1, 1, 0, 0, 0, 0, 1, 1])
Epoch : 0 Iter : 12 Loss : 0.0
torch.Size([8, 3, 600, 600]) tensor([1, 0, 1, 1, 0, 1, 0, 1])
Epoch : 0 Iter : 15 Loss : 2.9802316703353426e-07
torch.Size([8, 3, 600, 600]) tensor([1, 1, 1, 0, 1, 1, 0, 0])
Epoch : 0 Iter : 18 Loss : 5.215404144109925e-07
torch.Size([8, 3, 600, 600]) tensor([0, 1, 0, 0, 0, 1, 1, 0])
Epoch : 0 Iter : 21 Loss : 0.0
torch.Size([8, 3, 600, 600]) tensor([0, 1, 0, 0, 1, 0, 0, 0])
Epoch : 0 Iter : 24 Loss : 0.0
torch.Size([8, 3, 600, 600]) tensor([1, 0, 1, 1, 1, 0, 1, 0

Accuracy 0.5172

In [22]:
# new_data
new_pork = Image.open('../DATA/pork3.jpg')
new_beef = Image.open('../DATA/beef.jpg')

def preprocess_img(img):
    transform = transforms.Compose([
        transforms.Resize((600, 600)),
        transforms.ToTensor()
    ])
    img = img.convert('RGB')
    img = transform(img)
    img = img.unsqueeze(0)
    return img

preprocess_img(new_pork).shape, preprocess_img(new_beef).shape

(torch.Size([1, 3, 600, 600]), torch.Size([1, 3, 600, 600]))

In [23]:
def predict_img(img):
    model.eval()
    with torch.no_grad():
        output = model(img)
        _, predicted = torch.max(output, 1)
        return predicted

predict_img(preprocess_img(new_pork)), predict_img(preprocess_img(new_beef))

(tensor([0]), tensor([0]))

In [None]:
# ㅠㅠ 둘 다 돼지고기로 예측함