In [1]:
import os
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms

from sklearn.metrics import accuracy_score

In [2]:
# Only for visualization of process
from tqdm import notebook
def tqdm(x, **kargs):
    return notebook.tqdm(x, leave=False, **kargs)

## Download dataset 
https://drive.google.com/drive/u/3/folders/1sHh6NvuKX6RB5OytLwf4kaqfQ9svJNDQ

## Load data

In [3]:
class_index = {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4,
               'dog': 5, 'frog': 6,'horse': 7,'ship': 8, 'truck': 9}

In [90]:
class customDataset(Dataset):
    def __init__(self, datatype, transform, classes):
        ##############################################
        ### Initialize paths, transforms, and so on
        ##############################################
        self.transform = transform
        self.images = np.load("./source/x_{}.npy".format(datatype))
        self.labels = np.load("./source/y_{}.npy".format(datatype))
        self.classes = classes
        assert len(self.images) == len(self.labels), 'mismatched length!'
        print("image shape: {}, label shape: {}".format(self.images.shape, self.labels.shape))
        
        
    def __getitem__(self, index):
        ##############################################
        # 1. Read from file (using numpy.fromfile, PIL.Image.open)
        # 2. Preprocess the data (torchvision.Transform).
        # 3. Return the data (e.g. image and label)
        ##############################################
        
        image = self.images[index]
        label = self.labels[index]
        
        if self.transform is not None:
            image = self.transform(image)
        return image, label
        
    def __len__(self):
        ##############################################
        ### Indicate the total size of the dataset
        ##############################################
        return len(self.images)

In [172]:
data_transforms = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ]),
    'test': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ]),
}

In [173]:
batch_size = 100

In [174]:
trainset = customDataset(datatype='train',
                         transform=data_transforms['train'],
                         classes=class_index)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

image shape: (50000, 32, 32, 3), label shape: (50000, 1)


In [175]:
testset = customDataset(datatype='test',
                        transform=data_transforms['test'],
                        classes=class_index)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

image shape: (10000, 32, 32, 3), label shape: (10000, 1)


In [176]:
for imgs, lbls in trainloader:
    print('Size of image:', imgs.size())  
    print('Type of image:', imgs.dtype)   
    print('Size of label:', lbls.size())  
    print('Type of label:', lbls.dtype)
    break

Size of image: torch.Size([100, 3, 32, 32])
Type of image: torch.float32
Size of label: torch.Size([100, 1])
Type of label: torch.int64


In [177]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Build model & training (Pytorch)
> https://juejin.im/entry/5bf51d35e51d454049668d57  
> https://github.com/Aleadinglight/Pytorch-VGG-19/blob/master/VGG_19.ipynb?fbclid=IwAR15GpLCFuTC2xxz3VXb5KJd4wjpyCxEkT4KJ1MDkOlv73DwZSc7vBC7KRo  
> https://zhpmatrix.github.io/2019/03/11/conv-highlights-in-pytorch/  
> https://paperswithcode.com/sota/image-classification-on-cifar-10

In [158]:
class Net(nn.Module):

    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv5 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv6 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv7 = nn.Conv2d(128, 128, 1, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.conv8 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()

        self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU()

        self.fc14 = nn.Linear(512 * 4 * 4, 1024)
        self.drop1 = nn.Dropout2d()
        self.fc15 = nn.Linear(1024, 1024)
        self.drop2 = nn.Dropout2d()
        self.fc16 = nn.Linear(1024, 10)


    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.pool3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.pool4(x)
        x = self.bn4(x)
        x = self.relu4(x)

        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.pool5(x)
        x = self.bn5(x)
        x = self.relu5(x)

        x = x.view(-1,512 * 4 * 4)
        x = F.relu(self.fc14(x))
        x = self.drop1(x)
        x = F.relu(self.fc15(x))
        x = self.drop2(x)
        x = self.fc16(x)

        return x

In [178]:
class Net(nn.Module):

    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv5 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv6 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv7 = nn.Conv2d(128, 128, 1, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.conv8 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()

        self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU()

        self.fc14 = nn.Linear(512 * 4 * 4, 1024)
        self.drop1 = nn.Dropout2d()
        self.fc15 = nn.Linear(1024, 1024)
        self.drop2 = nn.Dropout2d()
        self.fc16 = nn.Linear(1024, 10)


    def forward(self,x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.bn1(x)
#         x = self.relu1(x)

        x = self.conv3(x)
        x = self.relu2(x)
        x = self.conv4(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.bn2(x)
#         x = self.relu2(x)

        x = self.conv5(x)
        x = self.relu3(x)
        x = self.conv6(x)
        x = self.relu3(x)
        x = self.conv7(x)
        x = self.relu3(x)
        x = self.pool3(x)
        x = self.bn3(x)
#         x = self.relu3(x)

        x = self.conv8(x)
        x = self.relu4(x)
        x = self.conv9(x)
        x = self.relu4(x)
        x = self.conv10(x)
        x = self.relu4(x)
        x = self.pool4(x)
        x = self.bn4(x)
#         x = self.relu4(x)

        x = self.conv11(x)
        x = self.relu5(x)
        x = self.conv12(x)
        x = self.relu5(x)
        x = self.conv13(x)
        x = self.relu5(x)
        x = self.pool5(x)
        x = self.bn5(x)
#         x = self.relu5(x)

        x = x.view(-1,512 * 4 * 4)
        x = F.relu(self.fc14(x))
        x = self.drop1(x)
        x = F.relu(self.fc15(x))
        x = self.drop2(x)
        x = self.fc16(x)

        return x

In [179]:
net = Net()
net.to(device)

Net(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (conv5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv7): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=1, dila

In [180]:
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=1e-5, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=1e-4)

In [181]:
epochs = 100
size = 500

In [182]:
for epoch in tqdm(range(epochs), desc='Epochs: '):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        labels = labels.view(-1)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % size == (size - 1):    # print every size mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / size))
            running_loss = 0.0
#     print("Epoch {} with loss {}".format(epoch, running_loss))

HBox(children=(FloatProgress(value=0.0, description='Epochs: ', style=ProgressStyle(description_width='initial…

[1,   500] loss: 1.682
[2,   500] loss: 1.229
[3,   500] loss: 1.012
[4,   500] loss: 0.878
[5,   500] loss: 0.782
[6,   500] loss: 0.707
[7,   500] loss: 0.650
[8,   500] loss: 0.604
[9,   500] loss: 0.561
[10,   500] loss: 0.530
[11,   500] loss: 0.498
[12,   500] loss: 0.469
[13,   500] loss: 0.447
[14,   500] loss: 0.430
[15,   500] loss: 0.410
[16,   500] loss: 0.390
[17,   500] loss: 0.373
[18,   500] loss: 0.352
[19,   500] loss: 0.339
[20,   500] loss: 0.326
[21,   500] loss: 0.315
[22,   500] loss: 0.301
[23,   500] loss: 0.289
[24,   500] loss: 0.281
[25,   500] loss: 0.268
[26,   500] loss: 0.256
[27,   500] loss: 0.251
[28,   500] loss: 0.238
[29,   500] loss: 0.234
[30,   500] loss: 0.228
[31,   500] loss: 0.215
[32,   500] loss: 0.212
[33,   500] loss: 0.204
[34,   500] loss: 0.199
[35,   500] loss: 0.195
[36,   500] loss: 0.188
[37,   500] loss: 0.184
[38,   500] loss: 0.174
[39,   500] loss: 0.175
[40,   500] loss: 0.163
[41,   500] loss: 0.162
[42,   500] loss: 0.157
[

In [16]:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

In [183]:
y_pred_torch = []
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        labels = labels.view(-1)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        y_pred_torch.append(predicted.cpu().detach().numpy())

In [184]:
test = []
for i in range(len(y_pred_torch)):
    for j in range(batch_size):
        test.append(y_pred_torch[i][j])

In [185]:
y_pred = test.copy()
y_pred = np.array(y_pred)

In [186]:
y_pred.shape

(10000,)

## DO NOT MODIFY CODE BELOW!
**Please screen shot your results and post it on your report**

In [187]:
assert y_pred.shape == (10000,)

In [188]:
y_test = np.load("./source/y_test.npy")
print("Accuracy of my model on test set: ", accuracy_score(y_test, y_pred))

Accuracy of my model on test set:  0.8873
