In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import torchvision
from torchvision.transforms import transforms

from itertools import product
from collections import OrderedDict,namedtuple

In [2]:
train_set = torchvision.datasets.FashionMNIST(root='./data',
                                              train=True,
                                              transform=transforms.Compose([transforms.ToTensor()]),
                                              download=True)

In [3]:
test_set = torchvision.datasets.FashionMNIST(root='./data',
                                             train=False, 
                                             transform=transforms.Compose([transforms.ToTensor()]),
                                             download=True)

In [4]:
params = OrderedDict(
    lr = [0.01],
    batch_size=[64],
    shuffle = [True],
    momentum = [0.9],
)

In [5]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple('Run',params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [6]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
        self.norm1 = nn.BatchNorm2d(32)
        nn.init.xavier_uniform_(self.conv1.weight)
        
        self.conv2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2)
        self.norm2 = nn.BatchNorm2d(64)
        nn.init.xavier_uniform_(self.conv2.weight)
        
        self.fc1 = torch.nn.Linear(in_features=4096, out_features=4096)
#         self.fc2 = torch.nn.Linear(in_features=300, out_features=60)
        self.out = torch.nn.Linear(in_features=4096, out_features=10)
        
    def forward(self,t):
        # 1. input layer
        t = t

        t = self.conv1(t)
        t = F.relu(t)
        t = self.norm1(t)
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        t = self.conv2(t)
        t = F.relu(t)
        t = self.norm2(t)
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        # 4. linear layer1
        t = t.reshape(-1,4096)
        t = self.fc1(t)
        t = F.relu(t)
        t = F.dropout(t, p=0.2)
        
        # 5. linear layer 2
#         t = self.fc2(t)
#         t = F.relu(t)
        
        # 6. output layer
        t = self.out(t)
#         t = F.softmax()
        
        return t

In [7]:
for run in RunBuilder.get_runs(params):
    network = Network().to('cuda')
    train_loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=1)
    test_loader = DataLoader(test_set,  batch_size=len(test_set), shuffle=run.shuffle, num_workers=1)
    optimizer = torch.optim.SGD(network.parameters(),lr=run.lr, momentum=run.momentum)
    tb = SummaryWriter(comment=f'-{run}')

    for epoch in range(30):

        total_train_loss = 0
        total_train_correct = 0
        
        for train_batch in train_loader:
            train_images = train_batch[0].to('cuda')       
            train_labels = train_batch[1].to('cuda')       
            train_preds = network(train_images)    
            train_loss = F.cross_entropy(train_preds, train_labels)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            total_train_loss += train_loss.item()
            total_train_correct += train_preds.argmax(dim=1).eq(train_labels).sum().item()
        
        total_train_loss /= len(train_loader)
        train_accuracy = total_train_correct / len(train_set)
        
        with torch.no_grad():
            test_batch = next(iter(test_loader))
            test_images = test_batch[0].to('cuda')
            test_labels = test_batch[1].to('cuda')
            test_preds = network(test_images)
            total_test_loss = F.cross_entropy(test_preds, test_labels).item()
            test_accuracy = test_preds.argmax(dim=1).eq(test_labels).sum().item() / len(test_set)
        
        tb.add_scalars('Loss',{'train_loss':total_train_loss,
                               'test_loss':total_test_loss}, epoch)
        tb.add_scalars('Accuracy',{'train_accuracy':train_accuracy,
                                   'test_accuracy':test_accuracy}, epoch)
        
        print('epochs:',epoch,'train_loss:',total_train_loss,'train_accuracy',train_accuracy,'test_loss:',total_test_loss,'test_accuracy',test_accuracy)
        
    tb.close()

epochs: 0 train_loss: 0.521083555750247 train_accuracy 0.8327166666666667 test_loss: 0.5015164613723755 test_accuracy 0.8408
epochs: 1 train_loss: 0.49087376323844323 train_accuracy 0.8443833333333334 test_loss: 0.49691879749298096 test_accuracy 0.8409
epochs: 2 train_loss: 0.4961709981279841 train_accuracy 0.8412333333333334 test_loss: 0.48884570598602295 test_accuracy 0.8408
epochs: 3 train_loss: 0.49178209036652215 train_accuracy 0.8442833333333334 test_loss: 0.5114157199859619 test_accuracy 0.8377
epochs: 4 train_loss: 0.4925164584793262 train_accuracy 0.84455 test_loss: 0.4970649480819702 test_accuracy 0.8382
epochs: 5 train_loss: 0.493261903715032 train_accuracy 0.84285 test_loss: 0.5326918959617615 test_accuracy 0.8138
epochs: 6 train_loss: 0.4951879217235773 train_accuracy 0.8419333333333333 test_loss: 0.5050074458122253 test_accuracy 0.8343
epochs: 7 train_loss: 0.49511187865154577 train_accuracy 0.8416833333333333 test_loss: 0.49550899863243103 test_accuracy 0.842
epochs: 8 t