In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function

import numpy as np

# UfLinear Layer

In [None]:
class UfLinearFunc(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, weight, weight_fb, bias=None):
        ctx.save_for_backward(input, weight, weight_fb, bias)
        output = input.mm(weight.t())
        if bias is not None:
            output += bias.unsqueeze(0).expand_as(output)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, weight_fb, bias = ctx.saved_tensors
        grad_input = grad_weight = grad_weight_fb = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]:
            grad_input = grad_output.mm(weight_fb) #weight_fb
        if ctx.needs_input_grad[1]:
            grad_weight = grad_output.t().mm(input)
        if ctx.needs_input_grad[2]:
            grad_weight_fb = grad_weight
        if bias is not None and ctx.needs_input_grad[3]:
            grad_bias = grad_output.sum(0)

        return grad_input, grad_weight, grad_weight_fb, grad_bias

In [None]:
import math
from torch import Tensor

class UfLinear(nn.Module):
    
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super(UfLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.weight_fb = nn.Parameter(torch.Tensor(out_features, in_features)) # feedbak weight
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.weight_fb, a=math.sqrt(5)) # feedback weight
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> Tensor:
        return UfLinearFunc.apply(input, self.weight, self.weight_fb, self.bias)

    def extra_repr(self) -> str:
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )

# UfConv2d Layer

In [None]:
class UfConv2dFunc(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    def forward(ctx, input, weight, weight_fb, bias=None, stride=1, padding=0, dilation=1, groups=1):
        ctx.save_for_backward(input, weight, weight_fb, bias) # Add weight for backward
        ctx.stride = stride
        ctx.padding = padding 
        ctx.dilation = dilation
        ctx.groups = groups

        output = F.conv2d(input, weight, bias, stride, padding, dilation, groups)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, weight_fb, bias = ctx.saved_tensors # Weight for backward
        stride = ctx.stride
        padding = ctx.padding 
        dilation = ctx.dilation
        groups = ctx.groups

        grad_input = grad_weight = grad_weight_fb = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]: ## use weight_fb
            grad_input = torch.nn.grad.conv2d_input(input.shape, weight_fb, grad_output, stride, padding, dilation, groups)
        if ctx.needs_input_grad[1]:
            grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, stride, padding, dilation, groups)
        if ctx.needs_input_grad[2]:
            grad_weight_fb = grad_weight
        if bias is not None and ctx.needs_input_grad[3]:
            grad_bias = grad_output.sum((0,2,3))

        return grad_input, grad_weight, grad_weight_fb, grad_bias, None, None, None, None 

In [None]:
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _pair

# For initialization
import math

class UfConv2d(_ConvNd):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
             padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'):
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(UfConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation,
             False, _pair(0), groups, bias, padding_mode)
        self.weight_fb = nn.Parameter(torch.Tensor(
                out_channels, in_channels // groups, *kernel_size))
        #Initialize
        #self.weight_fb = self.weight # Same as normal backprop
        nn.init.kaiming_uniform_(self.weight_fb, a=math.sqrt(5))
        
    def forward(self, input):
        if self.padding_mode != 'zeros':
            return UfConv2dFunc.apply(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                            self.weight, self.weight_fb, self.bias, self.stride, _pair(0), self.dilation, self.groups)
        return UfConv2dFunc.apply(input, self.weight, self.weight_fb, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

# Data

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

In [None]:
# Compute mean and std of dataset

transform = T.Compose([T.ToTensor()])
dataset = dset.CIFAR10(root='./data', train=True, download=True, transform=transform)
loader = DataLoader(dataset,batch_size=10,shuffle=False)

mean = 0.
std = 0.
for images, _ in loader: # batch_size * channel * H * W
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)

mean /= len(loader.dataset)
std /= len(loader.dataset)
print('mean:',mean)
print('std:',std)

Files already downloaded and verified
mean: tensor([0.4914, 0.4822, 0.4465])
std: tensor([0.2023, 0.1994, 0.2010])


In [None]:
def get_cifar10(num_train, batch_size):
    transform = T.Compose(
        [T.ToTensor(),
         T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    # transform = T.Compose([
    #             T.ToTensor(),
    #             T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    #         ])

    trainset = dset.CIFAR10(root='./data', train=True,
                            download=True, transform=transform)
    loader_train =DataLoader(trainset, batch_size=batch_size,
                             sampler=sampler.SubsetRandomSampler(range(num_train)))
    
    valset = dset.CIFAR10(root='./data', train=True,
                            download=True, transform=transform)
    loader_val =DataLoader(valset, batch_size=batch_size,
                             sampler=sampler.SubsetRandomSampler(range(num_train, 50000)))

    testset = dset.CIFAR10(root='./data', train=False,
                           download=True, transform=transform)
    loader_test = DataLoader(testset, batch_size=batch_size,
                             shuffle=False, num_workers=2)
    
    return loader_train, loader_val, loader_test

# Train and test

In [None]:
def train_model(model, optimizer, criterion, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = criterion(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            #if t % print_every == 0:
        print('Epoch %d, loss = %.4f' % (e, loss.item()))
        check_accuracy(loader_val, model)
        print()

In [None]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

# Simpnet

In [None]:
class Cifar10_Simpnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = UfConv2d(3, 32, kernel_size=5, padding=2)
        self.drop1 = nn.Dropout2d(p=0.8)
        self.fc1 = UfLinear(32*16*16, 500)
        self.drop2 = nn.Dropout2d(p=0.3)
        self.fc2 = UfLinear(500, 10)

    def forward(self, x): # 3*32*32
        x = torch.tanh(self.conv1(x)) # 32*32*32
        x = F.max_pool2d(x, 2, stride=2) # 32*16*16
        x = self.drop1(x)
        x = x.view(-1,32*16*16)
        x = torch.tanh(self.fc1(x))
        x = self.drop2(x)
        x = self.fc2(x)

        return x

In [None]:
class Cifar10_Simpnet_bp(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.drop1 = nn.Dropout2d(p=0.8)
        self.fc1 = nn.Linear(32*16*16, 500)
        self.drop2 = nn.Dropout2d(p=0.3)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x): # 3*32*32
        x = torch.tanh(self.conv1(x)) # 32*32*32
        x = F.max_pool2d(x, 2, stride=2) # 32*16*16
        x = self.drop1(x)
        x = x.view(-1,32*16*16)
        x = torch.tanh(self.fc1(x))
        x = self.drop2(x)
        x = self.fc2(x)

        return x

# Test on Cifar-10

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
loader_train, loader_val, loader_test = get_cifar10(num_train=45000, batch_size=256)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.optim as optim

#print_every=10000
dtype = torch.float32 

In [None]:
net = Cifar10_Simpnet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
train_model(net, optimizer, criterion, epochs=10)
check_accuracy(loader_test, net)

Epoch 0, loss = 2.0461
Checking accuracy on validation set
Got 1371 / 5000 correct (27.42)

Epoch 1, loss = 1.9524
Checking accuracy on validation set
Got 1624 / 5000 correct (32.48)

Epoch 2, loss = 1.8990
Checking accuracy on validation set
Got 1815 / 5000 correct (36.30)

Epoch 3, loss = 1.8398
Checking accuracy on validation set
Got 1919 / 5000 correct (38.38)

Epoch 4, loss = 1.8166
Checking accuracy on validation set
Got 1967 / 5000 correct (39.34)

Epoch 5, loss = 1.7633
Checking accuracy on validation set
Got 2028 / 5000 correct (40.56)

Epoch 6, loss = 1.7793
Checking accuracy on validation set
Got 2082 / 5000 correct (41.64)

Epoch 7, loss = 1.7713
Checking accuracy on validation set
Got 2122 / 5000 correct (42.44)

Epoch 8, loss = 1.7002
Checking accuracy on validation set
Got 2158 / 5000 correct (43.16)

Epoch 9, loss = 1.6918
Checking accuracy on validation set
Got 2196 / 5000 correct (43.92)

Checking accuracy on test set
Got 4417 / 10000 correct (44.17)


In [None]:
net = Cifar10_Simpnet_bp()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

train_model(net, optimizer, criterion, epochs=10)
check_accuracy(loader_test, net)



Epoch 0, loss = 1.9647
Checking accuracy on validation set
Got 1716 / 5000 correct (34.32)

Epoch 1, loss = 1.9136
Checking accuracy on validation set
Got 1907 / 5000 correct (38.14)

Epoch 2, loss = 1.8111
Checking accuracy on validation set
Got 1958 / 5000 correct (39.16)

Epoch 3, loss = 1.9075
Checking accuracy on validation set
Got 2045 / 5000 correct (40.90)

Epoch 4, loss = 1.7348
Checking accuracy on validation set
Got 2124 / 5000 correct (42.48)

Epoch 5, loss = 1.7541
Checking accuracy on validation set
Got 2175 / 5000 correct (43.50)

Epoch 6, loss = 1.7852
Checking accuracy on validation set
Got 2218 / 5000 correct (44.36)

Epoch 7, loss = 1.7507
Checking accuracy on validation set
Got 2244 / 5000 correct (44.88)

Epoch 8, loss = 1.7109
Checking accuracy on validation set
Got 2283 / 5000 correct (45.66)

Epoch 9, loss = 1.6705
Checking accuracy on validation set
Got 2302 / 5000 correct (46.04)

Checking accuracy on test set
Got 4553 / 10000 correct (45.53)


# UsLayer

In [None]:
class UsLinearFunc(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, weight, weight_fb, bias=None):
        ctx.save_for_backward(input, weight, weight_fb, bias)
        output = input.mm(weight.t())
        if bias is not None:
            output += bias.unsqueeze(0).expand_as(output)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, weight_fb, bias = ctx.saved_tensors
        grad_input = grad_weight = grad_weight_fb = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]:
            grad_input = grad_output.mm(torch.sign(weight_fb))
        if ctx.needs_input_grad[1]:
            grad_weight = grad_output.t().mm(input)
        if ctx.needs_input_grad[2]:
            grad_weight_fb = grad_weight
        if bias is not None and ctx.needs_input_grad[3]:
            grad_bias = grad_output.sum(0)

        return grad_input, grad_weight, grad_weight_fb, grad_bias

In [None]:
import math
from torch import Tensor

class UsLinear(nn.Module):
    
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super(UsLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.weight_fb = nn.Parameter(torch.Tensor(out_features, in_features)) # feedbak weight
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.weight_fb, a=math.sqrt(5)) # feedback weight
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> Tensor:
        return UsLinearFunc.apply(input, self.weight, self.weight_fb, self.bias)

    def extra_repr(self) -> str:
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )

In [None]:
class UsConv2dFunc(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    def forward(ctx, input, weight, weight_fb, bias=None, stride=1, padding=0, dilation=1, groups=1):
        ctx.save_for_backward(input, weight, weight_fb, bias) # Add weight for backward
        ctx.stride = stride
        ctx.padding = padding 
        ctx.dilation = dilation
        ctx.groups = groups

        output = F.conv2d(input, weight, bias, stride, padding, dilation, groups)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, weight_fb, bias = ctx.saved_tensors # Weight for backward
        stride = ctx.stride
        padding = ctx.padding 
        dilation = ctx.dilation
        groups = ctx.groups

        grad_input = grad_weight = grad_weight_fb = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]: ## use weight_fb
            grad_input = torch.nn.grad.conv2d_input(input.shape, torch.sign(weight_fb), grad_output, stride, padding, dilation, groups)
        if ctx.needs_input_grad[1]:
            grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, stride, padding, dilation, groups)
        if ctx.needs_input_grad[2]:
            grad_weight_fb = grad_weight
        if bias is not None and ctx.needs_input_grad[3]:
            grad_bias = grad_output.sum((0,2,3))

        return grad_input, grad_weight, grad_weight_fb, grad_bias, None, None, None, None 

In [None]:
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _pair

# For initialization
import math

class UsConv2d(_ConvNd):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
             padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'):
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(UsConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation,
             False, _pair(0), groups, bias, padding_mode)
        self.weight_fb = nn.Parameter(torch.Tensor(
                out_channels, in_channels // groups, *kernel_size))
        #Initialize
        #self.weight_fb = self.weight # Same as normal backprop
        nn.init.kaiming_uniform_(self.weight_fb, a=math.sqrt(5))
        
    def forward(self, input):
        if self.padding_mode != 'zeros':
            return UsConv2dFunc.apply(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                            self.weight, self.weight_fb, self.bias, self.stride, _pair(0), self.dilation, self.groups)
        return UsConv2dFunc.apply(input, self.weight, self.weight_fb, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

# Test Us

In [None]:
class Cifar10_Simpnet_Us(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = UsConv2d(3, 32, kernel_size=5, padding=2)
        self.drop1 = nn.Dropout2d(p=0.8)
        self.fc1 = UsLinear(32*16*16, 500)
        self.drop2 = nn.Dropout2d(p=0.3)
        self.fc2 = UsLinear(500, 10)

    def forward(self, x): # 3*32*32
        x = torch.tanh(self.conv1(x)) # 32*32*32
        x = F.max_pool2d(x, 2, stride=2) # 32*16*16
        x = self.drop1(x)
        x = x.view(-1,32*16*16)
        x = torch.tanh(self.fc1(x))
        x = self.drop2(x)
        x = self.fc2(x)

        return x

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
loader_train, loader_val, loader_test = get_cifar10(num_train=45000, batch_size=256)

import torch.optim as optim
dtype = torch.float32 

cuda:0
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [None]:
net = Cifar10_Simpnet_Us()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
train_model(net, optimizer, criterion, epochs=10)
check_accuracy(loader_test, net)

Epoch 0, loss = 1.9228
Checking accuracy on validation set
Got 1886 / 5000 correct (37.72)

Epoch 1, loss = 1.8127
Checking accuracy on validation set
Got 2062 / 5000 correct (41.24)

Epoch 2, loss = 1.7918
Checking accuracy on validation set
Got 2172 / 5000 correct (43.44)

Epoch 3, loss = 1.8184
Checking accuracy on validation set
Got 2256 / 5000 correct (45.12)

Epoch 4, loss = 1.7719
Checking accuracy on validation set
Got 2282 / 5000 correct (45.64)

Epoch 5, loss = 1.7273
Checking accuracy on validation set
Got 2354 / 5000 correct (47.08)

Epoch 6, loss = 1.6187
Checking accuracy on validation set
Got 2372 / 5000 correct (47.44)

Epoch 7, loss = 1.7044
Checking accuracy on validation set
Got 2409 / 5000 correct (48.18)

Epoch 8, loss = 1.6402
Checking accuracy on validation set
Got 2459 / 5000 correct (49.18)

Epoch 9, loss = 1.6932
Checking accuracy on validation set
Got 2437 / 5000 correct (48.74)

Checking accuracy on test set
Got 4918 / 10000 correct (49.18)
