# Backend.AI 체험하기
### pytorch version (Custom)
DATASET : FashionMNIST
MODEL : LeNET

### Improve performance
1. Change Hyper Parameters
2. Change Model DNN -> CNN (use GPU in Backend.AI cloud)

In [42]:
import numpy as np
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

config = {
    'batch_size' : 64,
    'lr':0.001,
    'n_classes' : 10,
    'epochs':20,
    'mean':0.5, 
    'std':0.5,
    'device': 'cuda:0' if torch.cuda.is_available() else 'cpu'
}

In [43]:

'''
# 간단한 transform 정의
'''
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((config['mean']), (config['std']))])


'''
# dataset & dataLoader
'''
trainset = datasets.FashionMNIST('.', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=config['batch_size'], shuffle=True)


testset = datasets.FashionMNIST('.', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=config['batch_size'], shuffle=False)

In [44]:
'''
# class balance 조사

balance good!!

'''
class_dict = {}
for _class in trainset.targets:
    _class = _class.item()
    if _class not in class_dict:
        class_dict[_class]=0
    class_dict[_class]+=1
print(class_dict)
config['n_classes'] = len(class_dict.keys())
config['n_classes']

{9: 6000, 0: 6000, 3: 6000, 2: 6000, 7: 6000, 5: 6000, 1: 6000, 6: 6000, 4: 6000, 8: 6000}


10

In [45]:
'''
MODEL 정의
tensorflow 예제에서는 단순 DNN이였다면 
퍼포먼스 향상을 위해 CNN(LeNet)으로 change
'''
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, n_classes = 1):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1,6,kernel_size = 1) # 28 x 28 이므로 5->1로 변경
        self.conv2 = nn.Conv2d(6,16,kernel_size = 5)
        self.conv3 = nn.Conv2d(16,120,kernel_size = 5)
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, n_classes)
        self.pool = nn.MaxPool2d(kernel_size = 2, stride =2)
        
    def forward(self, x): # tanh -> relu
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = x.view(-1,120)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class DNN(nn.Module):
    def __init__(self, n_classes = 1):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(28*28,128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [46]:
'''
train, valdation function
'''
def train(model, dataloader, criterion, optimizer, device):
    running_loss = 0
    for images, labels in tqdm(dataloader, position=0, leave=True):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    running_loss /= len(dataloader)
    return running_loss

def validation(model, dataloader, criterion, device):
    running_loss = 0
    preds = []
    targets = []
    for images, labels in tqdm(dataloader, position=0, leave=True):
        with torch.no_grad():
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        preds += torch.argmax(outputs,1).tolist()
        targets += labels.tolist()
        running_loss += loss.item()
    preds = np.array(preds)
    targets = np.array(targets)
    score = (preds == targets).sum() / len(preds)
    running_loss /= len(dataloader)
    return running_loss, score

In [47]:
'''
CNN 실험해보기
'''
device = config['device']
model = CNN(config['n_classes']).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = config['lr'])
epochs = config['epochs']


for epoch in range(1, epochs):
    t_loss = train(model, trainloader, criterion, optimizer, device)
    v_loss, score = validation(model, testloader, criterion, device)
    print('train_loss : {:.4f} \t test_loss : {:.4f} \t score : {:.3f}'.format(t_loss, v_loss, score))
print("Accuracy Score : {}".format(score))

100%|██████████| 938/938 [00:09<00:00, 96.38it/s] 
100%|██████████| 157/157 [00:01<00:00, 122.95it/s]
  1%|          | 11/938 [00:00<00:08, 103.89it/s]

train_loss : 0.6861 	 test_loss : 0.5520 	 score : 0.792


100%|██████████| 938/938 [00:09<00:00, 96.64it/s] 
100%|██████████| 157/157 [00:01<00:00, 116.70it/s]
  1%|          | 11/938 [00:00<00:09, 100.32it/s]

train_loss : 0.4511 	 test_loss : 0.4154 	 score : 0.849


100%|██████████| 938/938 [00:09<00:00, 94.84it/s]
100%|██████████| 157/157 [00:01<00:00, 120.51it/s]
  1%|          | 9/938 [00:00<00:11, 81.83it/s]

train_loss : 0.3771 	 test_loss : 0.3701 	 score : 0.868


100%|██████████| 938/938 [00:09<00:00, 96.34it/s] 
100%|██████████| 157/157 [00:01<00:00, 116.38it/s]
  1%|          | 10/938 [00:00<00:09, 96.58it/s]

train_loss : 0.3433 	 test_loss : 0.3583 	 score : 0.868


100%|██████████| 938/938 [00:09<00:00, 95.30it/s] 
100%|██████████| 157/157 [00:01<00:00, 117.96it/s]
  1%|          | 10/938 [00:00<00:09, 98.94it/s]

train_loss : 0.3180 	 test_loss : 0.3491 	 score : 0.874


100%|██████████| 938/938 [00:09<00:00, 95.76it/s] 
100%|██████████| 157/157 [00:01<00:00, 120.37it/s]
  1%|          | 9/938 [00:00<00:11, 83.48it/s]

train_loss : 0.3012 	 test_loss : 0.3199 	 score : 0.882


100%|██████████| 938/938 [00:09<00:00, 97.05it/s] 
100%|██████████| 157/157 [00:01<00:00, 120.85it/s]
  1%|          | 9/938 [00:00<00:10, 87.75it/s]

train_loss : 0.2889 	 test_loss : 0.3618 	 score : 0.867


100%|██████████| 938/938 [00:09<00:00, 97.47it/s] 
100%|██████████| 157/157 [00:01<00:00, 117.32it/s]
  1%|          | 10/938 [00:00<00:09, 96.70it/s]

train_loss : 0.2768 	 test_loss : 0.3078 	 score : 0.889


100%|██████████| 938/938 [00:09<00:00, 96.44it/s] 
100%|██████████| 157/157 [00:01<00:00, 118.63it/s]
  1%|          | 10/938 [00:00<00:09, 99.35it/s]

train_loss : 0.2661 	 test_loss : 0.3019 	 score : 0.891


100%|██████████| 938/938 [00:09<00:00, 95.13it/s] 
100%|██████████| 157/157 [00:01<00:00, 122.24it/s]
  1%|          | 10/938 [00:00<00:09, 95.51it/s]

train_loss : 0.2563 	 test_loss : 0.3080 	 score : 0.893


100%|██████████| 938/938 [00:09<00:00, 97.98it/s] 
100%|██████████| 157/157 [00:01<00:00, 121.11it/s]
  1%|          | 9/938 [00:00<00:10, 87.40it/s]

train_loss : 0.2483 	 test_loss : 0.3072 	 score : 0.890


100%|██████████| 938/938 [00:09<00:00, 98.55it/s] 
100%|██████████| 157/157 [00:01<00:00, 122.16it/s]
  1%|          | 10/938 [00:00<00:10, 92.54it/s]

train_loss : 0.2410 	 test_loss : 0.3138 	 score : 0.887


100%|██████████| 938/938 [00:09<00:00, 97.25it/s] 
100%|██████████| 157/157 [00:01<00:00, 118.96it/s]
  1%|          | 10/938 [00:00<00:09, 99.63it/s]

train_loss : 0.2326 	 test_loss : 0.3022 	 score : 0.894


100%|██████████| 938/938 [00:09<00:00, 97.46it/s] 
100%|██████████| 157/157 [00:01<00:00, 118.49it/s]
  1%|          | 11/938 [00:00<00:08, 106.36it/s]

train_loss : 0.2255 	 test_loss : 0.2991 	 score : 0.893


100%|██████████| 938/938 [00:09<00:00, 96.31it/s] 
100%|██████████| 157/157 [00:01<00:00, 117.71it/s]
  1%|          | 11/938 [00:00<00:08, 104.96it/s]

train_loss : 0.2188 	 test_loss : 0.3075 	 score : 0.893


100%|██████████| 938/938 [00:09<00:00, 99.05it/s] 
100%|██████████| 157/157 [00:01<00:00, 122.51it/s]
  1%|          | 10/938 [00:00<00:10, 91.14it/s]

train_loss : 0.2113 	 test_loss : 0.3035 	 score : 0.897


100%|██████████| 938/938 [00:09<00:00, 99.03it/s] 
100%|██████████| 157/157 [00:01<00:00, 119.51it/s]
  1%|          | 10/938 [00:00<00:09, 96.66it/s]

train_loss : 0.2063 	 test_loss : 0.3174 	 score : 0.894


100%|██████████| 938/938 [00:09<00:00, 99.63it/s] 
100%|██████████| 157/157 [00:01<00:00, 122.60it/s]
  1%|          | 11/938 [00:00<00:08, 107.87it/s]

train_loss : 0.2018 	 test_loss : 0.3077 	 score : 0.895


100%|██████████| 938/938 [00:09<00:00, 96.40it/s] 
100%|██████████| 157/157 [00:01<00:00, 118.78it/s]

train_loss : 0.1955 	 test_loss : 0.3122 	 score : 0.895
Accuracy Score : 0.895





In [48]:
'''
DNN 실험해보기
'''
device = config['device']
model = DNN(config['n_classes']).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = config['lr'])
epochs = config['epochs']


for epoch in range(1, epochs):
    t_loss = train(model, trainloader, criterion, optimizer, device)
    v_loss, score = validation(model, testloader, criterion, device)
    print('train_loss : {:.4f} \t test_loss : {:.4f} \t score : {:.3f}'.format(t_loss, v_loss, score))
print("Accuracy Score : {}".format(score))

100%|██████████| 938/938 [00:08<00:00, 115.02it/s]
100%|██████████| 157/157 [00:01<00:00, 129.15it/s]
  1%|          | 11/938 [00:00<00:08, 109.26it/s]

train_loss : 0.4994 	 test_loss : 0.4392 	 score : 0.842


100%|██████████| 938/938 [00:08<00:00, 113.58it/s]
100%|██████████| 157/157 [00:01<00:00, 122.65it/s]
  1%|▏         | 13/938 [00:00<00:07, 126.79it/s]

train_loss : 0.3768 	 test_loss : 0.3998 	 score : 0.855


100%|██████████| 938/938 [00:08<00:00, 116.27it/s]
100%|██████████| 157/157 [00:01<00:00, 129.25it/s]
  1%|▏         | 12/938 [00:00<00:08, 115.71it/s]

train_loss : 0.3415 	 test_loss : 0.3871 	 score : 0.863


100%|██████████| 938/938 [00:08<00:00, 115.08it/s]
100%|██████████| 157/157 [00:01<00:00, 127.37it/s]
  1%|▏         | 13/938 [00:00<00:07, 119.85it/s]

train_loss : 0.3164 	 test_loss : 0.3714 	 score : 0.864


100%|██████████| 938/938 [00:08<00:00, 116.54it/s]
100%|██████████| 157/157 [00:01<00:00, 127.96it/s]
  1%|▏         | 12/938 [00:00<00:07, 118.26it/s]

train_loss : 0.2981 	 test_loss : 0.3685 	 score : 0.869


100%|██████████| 938/938 [00:08<00:00, 116.64it/s]
100%|██████████| 157/157 [00:01<00:00, 121.05it/s]
  1%|▏         | 12/938 [00:00<00:07, 115.82it/s]

train_loss : 0.2846 	 test_loss : 0.3469 	 score : 0.877


100%|██████████| 938/938 [00:08<00:00, 112.26it/s]
100%|██████████| 157/157 [00:01<00:00, 131.84it/s]
  1%|          | 9/938 [00:00<00:10, 88.34it/s]

train_loss : 0.2702 	 test_loss : 0.3501 	 score : 0.874


100%|██████████| 938/938 [00:08<00:00, 115.74it/s]
100%|██████████| 157/157 [00:01<00:00, 122.65it/s]
  1%|▏         | 13/938 [00:00<00:07, 122.85it/s]

train_loss : 0.2631 	 test_loss : 0.3678 	 score : 0.870


100%|██████████| 938/938 [00:08<00:00, 114.38it/s]
100%|██████████| 157/157 [00:01<00:00, 130.13it/s]
  1%|▏         | 13/938 [00:00<00:07, 121.51it/s]

train_loss : 0.2494 	 test_loss : 0.3504 	 score : 0.876


100%|██████████| 938/938 [00:08<00:00, 114.83it/s]
100%|██████████| 157/157 [00:01<00:00, 123.41it/s]
  1%|▏         | 13/938 [00:00<00:07, 123.33it/s]

train_loss : 0.2406 	 test_loss : 0.3625 	 score : 0.873


100%|██████████| 938/938 [00:07<00:00, 117.81it/s]
100%|██████████| 157/157 [00:01<00:00, 127.16it/s]
  1%|▏         | 12/938 [00:00<00:08, 114.71it/s]

train_loss : 0.2325 	 test_loss : 0.3439 	 score : 0.882


100%|██████████| 938/938 [00:08<00:00, 116.44it/s]
100%|██████████| 157/157 [00:01<00:00, 127.05it/s]
  1%|▏         | 12/938 [00:00<00:07, 117.48it/s]

train_loss : 0.2253 	 test_loss : 0.3659 	 score : 0.879


100%|██████████| 938/938 [00:08<00:00, 116.34it/s]
100%|██████████| 157/157 [00:01<00:00, 124.95it/s]
  1%|          | 11/938 [00:00<00:08, 109.64it/s]

train_loss : 0.2173 	 test_loss : 0.3429 	 score : 0.886


100%|██████████| 938/938 [00:08<00:00, 114.40it/s]
100%|██████████| 157/157 [00:01<00:00, 126.85it/s]
  1%|▏         | 12/938 [00:00<00:07, 118.82it/s]

train_loss : 0.2111 	 test_loss : 0.3566 	 score : 0.884


100%|██████████| 938/938 [00:08<00:00, 114.64it/s]
100%|██████████| 157/157 [00:01<00:00, 123.50it/s]
  1%|▏         | 12/938 [00:00<00:08, 112.18it/s]

train_loss : 0.2029 	 test_loss : 0.3595 	 score : 0.886


100%|██████████| 938/938 [00:08<00:00, 113.65it/s]
100%|██████████| 157/157 [00:01<00:00, 124.17it/s]
  1%|          | 10/938 [00:00<00:09, 98.38it/s]

train_loss : 0.1982 	 test_loss : 0.3728 	 score : 0.878


100%|██████████| 938/938 [00:08<00:00, 114.35it/s]
100%|██████████| 157/157 [00:01<00:00, 121.82it/s]
  1%|▏         | 12/938 [00:00<00:08, 112.08it/s]

train_loss : 0.1917 	 test_loss : 0.3516 	 score : 0.888


100%|██████████| 938/938 [00:08<00:00, 113.19it/s]
100%|██████████| 157/157 [00:01<00:00, 130.27it/s]
  1%|▏         | 12/938 [00:00<00:07, 116.82it/s]

train_loss : 0.1862 	 test_loss : 0.3546 	 score : 0.884


100%|██████████| 938/938 [00:08<00:00, 115.22it/s]
100%|██████████| 157/157 [00:01<00:00, 121.97it/s]

train_loss : 0.1813 	 test_loss : 0.3676 	 score : 0.886
Accuracy Score : 0.886





# Result

에폭 늘려서 추가 학습을 진행시킬 경우 CNN이 DNN보다 약 0.01정도 높음.   
backend.ai pytorch-gpu 환경도 적응완료!