In [1]:
import numpy as np
import datetime

In [2]:
import torch
import torchvision            
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

In [3]:
from torch.utils.data import DataLoader

In [4]:
from torchvision import datasets
from torchvision import transforms
import helper

In [5]:
%matplotlib inline
from matplotlib.pyplot import imshow, imsave

In [6]:
MODEL_NAME = 'DNN'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("MODEL_NAME = {}, DEVICE = {}".format(MODEL_NAME, DEVICE))

MODEL_NAME = DNN, DEVICE = cpu


In [7]:
class HelloCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(HelloCNN, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.fc = nn.Sequential(
            nn.Linear(7*7*64, 512),
            nn.Dropout(p=0.5),
            nn.Linear(512, num_classes),
        )
        
    def forward(self, x):
        y_ = self.conv(x) # (N, 64, 7, 7)
        y_ = y_.view(y_.size(0), -1) # (N, 64*7*7)
        y_ = self.fc(y_)
        return y_

In [8]:
model = HelloCNN().to(DEVICE)

In [9]:
transform = transforms.Compose(
    [transforms.ToTensor(),                               # image to tensor
     transforms.Normalize(mean=(0.1307,), std=(0.3081,))  # normalize to "(x-mean)/std"
    ])

In [10]:
mnist_train = datasets.MNIST(root='./data/', train=True, transform=transform, download=True)
mnist_test = datasets.MNIST(root='./data/', train=False, transform=transform, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [11]:
fashion_train = datasets.FashionMNIST('./data/', train=True,  transform=transform, download=True)
fashion_test  = datasets.FashionMNIST('./data/', train=False, transform=transform, download=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw
Processing...
Done!


In [12]:
batch_size = 64

In [13]:
mnist_train_loader = DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)
mnist_test_loader = DataLoader(dataset=mnist_test, batch_size=100, shuffle=False, drop_last=False)

In [14]:
fashion_train_loader = DataLoader(dataset=fashion_train, batch_size=batch_size, shuffle=True, drop_last=True)
fashion_test_loader  = DataLoader(dataset=fashion_test, batch_size=100, shuffle=False, drop_last=False)

In [15]:
!ls ./data/FashionMNIST/raw

t10k-images-idx3-ubyte	   train-images-idx3-ubyte
t10k-images-idx3-ubyte.gz  train-images-idx3-ubyte.gz
t10k-labels-idx1-ubyte	   train-labels-idx1-ubyte
t10k-labels-idx1-ubyte.gz  train-labels-idx1-ubyte.gz


In [16]:
!ls ./data/MNIST/raw

t10k-images-idx3-ubyte	   train-images-idx3-ubyte
t10k-images-idx3-ubyte.gz  train-images-idx3-ubyte.gz
t10k-labels-idx1-ubyte	   train-labels-idx1-ubyte
t10k-labels-idx1-ubyte.gz  train-labels-idx1-ubyte.gz


In [17]:
import pathlib, PIL, random, os, gzip
from pathlib import Path

In [18]:
def load_mnist(path, kind='train'):
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte.gz' % kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte.gz' % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)

    return images, labels
  
def save_mnist(path, images, labels):
    p = Path(path)
    p.mkdir(parents=True, exist_ok=True)
    # prep 10 dirs
    for l in range(10): (p/str(l)).mkdir(parents=True, exist_ok=True)
    for i, (im,l) in enumerate(zip(images, labels)):
        dest = p/str(l)/f"{i}.jpg"
        im = im.reshape(28, 28)
        im = PIL.Image.fromarray(im, mode='L')
        with dest.open(mode='wb') as f: im.save(f)

def split_pct(images, labels, pct=0.8):
    items = len(images)
    print(len(images))
    idx = list(range(items))
    split = int(items*pct/2) 
    random.shuffle(idx)
    train_idx = idx[:split]
    valid_idx = idx[split:]
    return images[train_idx], labels[train_idx], images[valid_idx], labels[valid_idx]

def mnist_to_imagenet_format():
    images, labels = load_mnist('./data/MNIST/raw', 'train')
    images_trn, labels_trn, images_val, labels_val = split_pct(images, labels, 0.8)
    save_mnist('data/MNIST/train', images_trn, labels_trn)
    save_mnist('data/MNIST/valid', images_val, labels_val)

    images, labels = load_mnist('./data/MNIST/raw', 't10k')
    save_mnist('data/MNIST/test', images, labels)
    
mnist_to_imagenet_format()


60000


In [19]:
!ls data/MNIST/test/
!ls data/MNIST/train/
print(len(os.listdir('data/MNIST/test/0/')), len(os.listdir('data/MNIST/test/1/')), len(os.listdir('data/MNIST/test/2/')), len(os.listdir('data/MNIST/test/3/')), len(os.listdir('data/MNIST/test/4/')), len(os.listdir('data/MNIST/test/5/')), len(os.listdir('data/MNIST/test/6/')), len(os.listdir('data/MNIST/test/7/')), len(os.listdir('data/MNIST/test/8/')), len(os.listdir('data/MNIST/test/9/')))
print(len(os.listdir('data/MNIST/train/0/')), len(os.listdir('data/MNIST/train/1/')), len(os.listdir('data/MNIST/train/2/')), len(os.listdir('data/MNIST/train/3/')), len(os.listdir('data/MNIST/train/4')), len(os.listdir('data/MNIST/train/5/')), len(os.listdir('data/MNIST/train/6/')), len(os.listdir('data/MNIST/train/7/')), len(os.listdir('data/MNIST/train/8/')), len(os.listdir('data/MNIST/train/9/')))

0  1  2  3  4  5  6  7	8  9
0  1  2  3  4  5  6  7	8  9
980 1135 1032 1010 982 892 958 1028 974 1009
2428 2738 2374 2381 2270 2187 2383 2542 2325 2372


In [20]:
def load_mnist(path, kind='train'):
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte.gz' % kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte.gz' % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)

    return images, labels
  
def save_mnist(path, images, labels):
    p = Path(path)
    p.mkdir(parents=True, exist_ok=True)
    for l in range(10): (p/str(l+10)).mkdir(parents=True, exist_ok=True)
    for i, (im,l) in enumerate(zip(images, labels)):
        dest = p/str(l+10)/f"{i}.jpg"
        im = im.reshape(28, 28)
        im = PIL.Image.fromarray(im, mode='L')
        with dest.open(mode='wb') as f: im.save(f)

def split_pct(images, labels, pct=0.8):
    items = len(images)
    print(len(images))
    idx = list(range(items))
    split = int(items*pct/2) 
    #print(idx, split)
    random.shuffle(idx)
    train_idx = idx[:split]
    valid_idx = idx[split:]
    return images[train_idx], labels[train_idx], images[valid_idx], labels[valid_idx]

def mnist_to_imagenet_format():
    images, labels = load_mnist('./data/FashionMNIST/raw', 'train')
    images_trn, labels_trn, images_val, labels_val = split_pct(images, labels, 0.8)
    save_mnist('data/FashionMNIST/train', images_trn, labels_trn)
    save_mnist('data/FashionMNIST/valid', images_val, labels_val)

    images, labels = load_mnist('./data/FashionMNIST/raw', 't10k')
    save_mnist('data/FashionMNIST/test', images, labels)
    
mnist_to_imagenet_format()

60000


In [21]:
!ls data/FashionMNIST/test/
!ls data/FashionMNIST/train/
print(len(os.listdir('data/FashionMNIST/test/10/')), len(os.listdir('data/FashionMNIST/test/11/')), len(os.listdir('data/FashionMNIST/test/12/')), len(os.listdir('data/FashionMNIST/test/13/')), len(os.listdir('data/FashionMNIST/test/14/')), len(os.listdir('data/FashionMNIST/test/15/')), len(os.listdir('data/FashionMNIST/test/16/')), len(os.listdir('data/FashionMNIST/test/17/')), len(os.listdir('data/FashionMNIST/test/18/')), len(os.listdir('data/FashionMNIST/test/19/')))
print(len(os.listdir('data/FashionMNIST/train/10/')), len(os.listdir('data/FashionMNIST/train/11/')), len(os.listdir('data/FashionMNIST/train/12/')), len(os.listdir('data/FashionMNIST/train/13/')), len(os.listdir('data/FashionMNIST/train/14')), len(os.listdir('data/FashionMNIST/train/15/')), len(os.listdir('data/FashionMNIST/train/16/')), len(os.listdir('data/FashionMNIST/train/17/')), len(os.listdir('data/FashionMNIST/train/18/')), len(os.listdir('data/FashionMNIST/train/19/')))

10  11	12  13	14  15	16  17	18  19
10  11	12  13	14  15	16  17	18  19
1000 1000 1000 1000 1000 1000 1000 1000 1000 1000
2374 2418 2344 2490 2390 2426 2391 2373 2441 2353


In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transfroms
import pandas as pd
from collections import OrderedDict
from IPython.display import clear_output

learning_rate = 0.001
batch_size = 100
num_classes = 10
epochs = 3

In [23]:
train_set = torchvision.datasets.FashionMNIST(
    root = 'data/FashionMNIST/train',
    train = True,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor()
    ])
)
test_set = torchvision.datasets.FashionMNIST(
    root = 'data/FashionMNIST/test',
    train = False,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor()
    ])
)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/train/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/train/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw/t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/train/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/train/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/train/FashionMNIST/raw
Processing...
Done!
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/test/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/FashionMNIST/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/test/FashionMNIST/raw
Processing...
Done!


In [24]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.dropout = nn.Dropout()
        self.fc1 = nn.Linear(in_features=7*7*64, out_features=1000)
        self.fc2 = nn.Linear(in_features=1000, out_features=10)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.reshape(x.size(0), -1)
        x = self.dropout(x) 
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [25]:
model = ConvNet()
critertion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [26]:
import pandas as pd
from collections import OrderedDict
from IPython.display import clear_output

In [27]:
total_step = len(train_loader)
pd_results = []

for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        out = model(images)
        loss = critertion(out, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total = labels.size(0)
        preds = torch.max(out.data, 1)[1]
        correct = (preds==labels).sum().item()
        
        if (i+1)%200==0:
            results = OrderedDict()
            results['epoch'] = epoch+1
            results['idx'] = (i+1)/2
            results['loss'] = loss.item()
            results['accuracy'] = 100.*correct/total
            pd_results.append(results)
            df = pd.DataFrame.from_dict(pd_results, orient='columns')
            clear_output(wait=True)
            display(df)

        i=i+1

Unnamed: 0,epoch,idx,loss,accuracy
0,1,100.0,0.448464,81.0
1,1,200.0,0.430752,84.0
2,1,300.0,0.3084,88.0
3,2,100.0,0.324139,89.0
4,2,200.0,0.343425,87.0
5,2,300.0,0.229394,93.0
6,3,100.0,0.285185,87.0
7,3,200.0,0.294143,89.0
8,3,300.0,0.253904,91.0


In [28]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        out = model(images)
        preds = torch.max(out.data, 1)[1]
        total += len(labels)
        correct += (preds==labels).sum().item()
    print('Test Accuracy: ', 100.*correct/total)

Test Accuracy:  89.49


In [29]:
train_set = torchvision.datasets.FashionMNIST(
    root = 'data/MNIST/train',
    train = True,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor()
    ])
)
test_set = torchvision.datasets.FashionMNIST(
    root = 'data/MNIST/test',
    train = False,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor()
    ])
)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/MNIST/train/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/train/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/MNIST/train/FashionMNIST/raw/train-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/train/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/MNIST/train/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/train/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/train/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/MNIST/train/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/train/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/train/FashionMNIST/raw
Processing...
Done!
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/MNIST/test/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/test/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/MNIST/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/MNIST/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz






HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/test/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/MNIST/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/test/FashionMNIST/raw
Processing...
Done!


In [30]:
model = ConvNet()
critertion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [31]:
total_step = len(train_loader)
pd_results = []

for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        out = model(images)
        loss = critertion(out, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total = labels.size(0)
        preds = torch.max(out.data, 1)[1]
        correct = (preds==labels).sum().item()
        
        if (i+1)%200==0:
            results = OrderedDict()
            results['epoch'] = epoch+1
            results['idx'] = (i+1)/2
            results['loss'] = loss.item()
            results['accuracy'] = 100.*correct/total
            pd_results.append(results)
            df = pd.DataFrame.from_dict(pd_results, orient='columns')

            clear_output(wait=True)
            display(df)
        i=i+1

Unnamed: 0,epoch,idx,loss,accuracy
0,1,100.0,0.444491,83.0
1,1,200.0,0.362505,87.0
2,1,300.0,0.281467,89.0
3,2,100.0,0.303001,88.0
4,2,200.0,0.307682,88.0
5,2,300.0,0.255827,92.0
6,3,100.0,0.341312,85.0
7,3,200.0,0.284639,89.0
8,3,300.0,0.231812,90.0


In [32]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        out = model(images)
        preds = torch.max(out.data, 1)[1]
        total += len(labels)
        correct += (preds==labels).sum().item()
    print('Test Accuracy: ', 100.*correct/total)

Test Accuracy:  89.31


This model is probably the most typical CNN model with dropout.
In order to solve the overfitting issue, some network will be omitted during the training process. Some neurons will be omitted (they are selected randomly).
This is effective because it prevents the co-adaptation of weights. This way, the image result can be a bit clearer.
I've reduced the training size from around 4000 to 2000, and I've set epoch size to 3 to reduce time.