In [1]:
import time
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
train_data = pd.read_csv('/content/drive/My Drive/data/fashionmnist/fashion-mnist_train.csv')
test_data = pd.read_csv('/content/drive/My Drive/data/fashionmnist/fashion-mnist_test.csv')
train_data.head()

In [None]:
# 이미지 데이터와 라벨 데이터 분리
train_x = np.array(train_data.iloc[:,1:])
train_y = np.array(train_data['label'])
test_x = np.array(test_data.iloc[:,1:])
test_y = np.array(test_data['label'])

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)


# 시각화 
# 임의로 3번사진을 가져옵니다.
image_array = np.array(train_x[3]).reshape(28,28)
pic = Image.fromarray(image_array.astype('uint8'))
plt.imshow(pic)

In [6]:
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
from torch import nn

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device:', device)

Using PyTorch version: 1.1.0  Device: cuda


In [0]:
# 파라미터 선택
random_seed = 0
learning_rate = 0.0001
num_epochs = 40
batch_size = 32

input_size = 784
h1_size = 512
h2_size = 512 
output_size = 10

In [8]:
# 데이터셋을 클래스로 만들고 파이토치의 dataloader로 만들어주면 손쉽게 훈련에서 사용할 수 있습니다.
# 함수들은 dataloader로 데이터를 끌고올 때 사용됩니다.
class FashionDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        # import and initialize dataset
        self.X = X
        self.Y = Y

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

    def __len__(self):
        # returns length of data
        return len(self.X)
    
dataset = FashionDataset(train_x, train_y)
testset = FashionDataset(test_x, test_y)
print(type(dataset))
print(len(dataset))
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = True)

dataloader

<class '__main__.FashionDataset'>
60000


<torch.utils.data.dataloader.DataLoader at 0x7f06e78ef1d0>

In [0]:
# 다층 신경망 클래스 정의
class MLP(nn.Module):
    def __init__(self, input_size, output_size, h1_size, h2_size):
        super(MLP, self).__init__()
        # 파라미터 정의
        self.input_size = input_size
        self.output_size = output_size
        self.h1_size = h1_size
        self.h2_size = h2_size  
        
        # 신경망 정의
        # Sequential로 한꺼번에 묶어서 처리 할 수 있습니다.
        self.layers = nn.Sequential(
            nn.Linear(self.input_size, self.h1_size, bias=True),
            nn.ReLU(),
            nn.Linear(self.h1_size, self.h2_size, bias=True),
            nn.ReLU(),
            nn.Linear(self.h2_size, self.output_size, bias=True)
        )

    def forward(self, x):
        # 학습을 위해 텐서 shape을 바꿔줌
        x = x.view(-1, 784)
        x = self.layers(x.float())
        return x



In [10]:
my_mlp = MLP(input_size, output_size, h1_size, h2_size).to(device)
print(my_mlp)

MLP(
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [0]:
optimizer = torch.optim.Adam(my_mlp.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

In [68]:
# 성능 측정 데이터를 쌓을 dictionary
performance = {'test_acc': [],'test_loss': []} 

for epoch in range(num_epochs):
    # train
    my_mlp.train()
    # iter, image, label 반복문
    for i, (x, y) in enumerate(dataloader):
        # 모델에 데이터를 흘려 넣어줍니다.
        x, y = x.to(device), y.to(device) 
        outputs = my_mlp(x)
        loss = criterion(outputs, y)
        # optimizer의 변화도 버퍼(gradient buffer)를 0으로 설정하고, 무작위 값으로 역전파를 합니다.
        optimizer.zero_grad()
        loss.backward()
        # 가중치 업데이트 
        optimizer.step()

    # eval
    y_pred, y_true = [], []
    test_acc = 0
    test_loss = 0
    my_mlp.eval()
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            # Loss
            outputs = my_mlp(x)   # 예측 label 
            loss = criterion(outputs, y)
            test_loss += loss.item()
            # Accuracy
            y_true += list(y.cpu())   # 정답 label
            y_pred += list(np.argmax(F.softmax(outputs).cpu(), axis=1)) # 소프트맥스 확률값에서 argmax로 제일 높은 인덱스를 뽑자
            # sklearn의 정확도 측정 모듈 accuracy_score
            acc = accuracy_score(y_true, y_pred)
            test_acc += acc

        # 누적된 통계치들에 batch를 반복한 횟수로 나누자
        # 반복 횟수 = batch size 32로 잡았으면 testset size가 1만이니까 10k / 32 해서 313
        test_acc = test_acc / len(test_loader.batch_sampler)
        test_loss = test_loss / len(test_loader.batch_sampler)

        performance["test_acc"].append(test_acc)
        performance["test_loss"].append(test_loss)

        # 성능 출력
        if epoch % 5 == 0:
            print(f"Epoch: {epoch}, Test Loss: {test_loss:.5f}, Test Acc: {test_acc:.5f}")



Epoch: 0, Test Loss: 0.38060, Test Acc: 0.89603
Epoch: 5, Test Loss: 0.39937, Test Acc: 0.89681
Epoch: 10, Test Loss: 0.42827, Test Acc: 0.90248
Epoch: 15, Test Loss: 0.52154, Test Acc: 0.89501
Epoch: 20, Test Loss: 0.56811, Test Acc: 0.89816
Epoch: 25, Test Loss: 0.54166, Test Acc: 0.89776
Epoch: 30, Test Loss: 0.63486, Test Acc: 0.88847
Epoch: 35, Test Loss: 0.69097, Test Acc: 0.90023


### 참고자료
* https://medium.com/biaslyai/pytorch-introduction-to-neural-network-feedforward-neural-network-model-e7231cff47cb
* https://www.kaggle.com/pinocookie/pytorch-simple-mlp
* https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html