In [0]:
# import standard PyTorch modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#from torch.utils.tensorboard import SummaryWriter # TensorBoard support

# import torchvision module to handle image manipulation
import torchvision
import torchvision.transforms as transforms

# calculate train time, writing train data to files etc.
import time
import pandas as pd
import json
from IPython.display import clear_output

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity

<torch.autograd.grad_mode.set_grad_enabled at 0x122ca9048>

In [0]:
# check PyTorch versions
print(torch.__version__)
print(torchvision.__version__)

1.4.0
0.5.0


In [0]:
# Use standard FashionMNIST dataset
trainset = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.ToTensor()
)


trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
                                          shuffle=False)
testset = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.ToTensor()
)


testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size,
                                          shuffle=False)

print(trainset)
print(testset)

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ./data/FashionMNIST
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ./data/FashionMNIST
    Split: Test
    StandardTransform
Transform: ToTensor()


In [0]:
# Build the neural network, expand on top of nn.Module
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()

    # define layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
    #self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    
    self.pool = nn.MaxPool2d(kernel_size=2)

    self.fc1 = nn.Linear(in_features=32*13*13, out_features=100)
    self.fc2 = nn.Linear(in_features=100, out_features=10)
    #self.out = nn.Linear(in_features=60, out_features=10)

  # define forward function
  def forward(self, t):
    # conv 
    t = self.conv1(t)
    t = F.relu(t)
    
    # maxpool 
    t = self.pool(t)

    # flatten
    t = t.reshape(-1, 32*13*13)
    
    # dense 100
    t = self.fc1(t)
    t = F.relu(t)

    # dense 10
    t = self.fc2(t)
    t = F.relu(t)

    # don't need softmax here since we'll use cross-entropy as activation.

    return t

In [0]:
def train(epoch):
    # these are very standard functions for going over data to train

    Network.train() # effects Dropout and BatchNorm layers
    for batch_idx, (data, target) in enumerate(trainloader):
        #if args.cuda:
         #   data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = Network(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
            
def test(data_loader,verbose='Test'):
    # these are very standard functions for evaluating data

    Network.eval() # effects Dropout and BatchNorm layers
    test_loss = 0
    correct = 0
    for data, target in data_loader:
        #if args.cuda:
          #  data, target = data.cuda(), target.cuda()
        output = Network(data)
        test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(data_loader.dataset)
    print('\n'+verbose+' set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))
    accuracy=100. * correct / len(data_loader.dataset)
    return(accuracy)

In [0]:
import argparse # handles arguments
import sys; sys.argv=['']; del sys # required to use parser in jupyter notebooks

# training settings
parser = argparse.ArgumentParser(description='PyTorch Convmodel Fashion-MNIST')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
#parser.add_argument('--no-cuda', action='store_true', default=False,
#                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
args = parser.parse_args()
args.epochs=5
#args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
#if args.cuda:
#    torch.cuda.manual_seed(args.seed)

#cuda_kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

<torch._C.Generator at 0x107628e50>

In [0]:
# cross_entropy for training: takes class labels NOT one-hot vectors!
criterion = F.cross_entropy
# define SGD optimizer
Network = Network()
optimizer = optim.SGD(Network.parameters(), lr=args.lr, momentum=args.momentum)
#optimizer = optim.Adam(DNN.parameters(), lr=0.001, betas=(0.9, 0.999))

test_array=[]

# train the CNN and test its performance at each epoch
for epoch in range(1, args.epochs + 1):
    train(epoch)
    if epoch==args.epochs:
            test_array.append(test(testloader,verbose='Test'))
            #critical_array.append(test(critical_loader,verbose='Critical'))
    else:
            test(testloader,verbose='Test')
            #test(critical_loader,verbose='Critical')
print(test_array)
#print(critical_array)                                                        






Test set: Average loss: 0.5725, Accuracy: 7890/10000 (79%)



KeyboardInterrupt: 

In [0]:
# Build RNN
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()

    # define layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
    #self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    
    self.pool = nn.MaxPool2d(kernel_size=2)

    self.fc1 = nn.Linear(in_features=32*13*13, out_features=100)
    self.fc2 = nn.Linear(in_features=100, out_features=10)
    #self.out = nn.Linear(in_features=60, out_features=10)

  # define forward function
  def forward(self, t):
    # conv 
    t = self.conv1(t)
    t = F.relu(t)
    
    # maxpool 
    t = self.pool(t)

    # flatten
    t = t.reshape(-1, 32*13*13)
    
    # dense 100
    t = self.fc1(t)
    t = F.relu(t)

    # dense 10
    t = self.fc2(t)
    t = F.relu(t)

    # don't need softmax here since we'll use cross-entropy as activation.

    return t