## Image classification with PyTorch & CNN

## Simple CNN Model Using PyTorch ##

Simple notebook to do
   * simple data augmentation
   * Build a simple CNN model and train it
   * Predict test data Submit

with pytorch

## Import Packages ##

In [None]:
from PIL import Image

import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torch.utils.data import ConcatDataset
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import random_split

import matplotlib.pyplot as plt
torch.manual_seed(17)

## Simple EDA ##

We knows our dataset is Fashion-MNIST data, so there is remarks.
1. No class imbalance
2. All same image size
These makes us easy to use this dataset.

## Build DataLoader ##

transforms is the module for image Transforming and Augmenting.
Can chain process using 'Compose'

I thought below process will be needed.
* normalize
* Totensor

Pytorch officially use **(0.485, 0.456, 0.406), (0.229, 0.224, 0.225)**
instead of
(0.5, 0.5, 0.5),(0.5, 0.5, 0.5)
Those value is trained by ImageNet's mean and stddev

> https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2

In [None]:
transform_normal=transforms.Compose([
      transforms.ToTensor(),
      transforms.RandomRotation([-60,60]),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
transform_rotate=transforms.Compose([
    
      transforms.RandomRotation([-60,60]),
      transforms.ToTensor(),
      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
transform_horizontalflip=transforms.Compose([
    
      transforms.RandomHorizontalFlip(1),
      transforms.ToTensor(),
      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
transform_verticalflip=transforms.Compose([
      transforms.RandomVerticalFlip(1),
      transforms.ToTensor(),
      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

Dataset already includes base images, I flipped image for probability 1.\

## Data Augmentation ##
Since there is not enough train_data set, Model needed much more dataset.
To solve this problem, pytorch provides data augmentation method such as transforsm.RandomRotation and etc.

But Those method is in transform, which is used to make dataset.
So I selected to make several datasets and concat those with **torch.utils.data.ConcatDataset()**

And I used 3 transforms
* rotation
* horizontal_flip
* vertical_flip

In [None]:
dataset_normal = ImageFolder(root="/kaggle/input/devkor-image-classification/train",
                      transform=transform_normal)
dataset_rotate = ImageFolder(root="/kaggle/input/devkor-image-classification/train",
                      transform=transform_rotate)
dataset_horizontalflip = ImageFolder(root="/kaggle/input/devkor-image-classification/train",
                      transform=transform_horizontalflip)
dataset_verticalflip = ImageFolder(root="/kaggle/input/devkor-image-classification/train",
                      transform=transform_verticalflip)

final_dataset = ConcatDataset([dataset_normal,dataset_rotate, dataset_horizontalflip, dataset_verticalflip])
# final_dataset = dataset_normal
dataloader = DataLoader(final_dataset, batch_size=128, shuffle=True, num_workers=2)

In [None]:
print(len(final_dataset))

train_set, validation_set make

In [None]:
train_size = int(len(final_dataset)*0.8)
val_size = len(final_dataset) -  train_size

train_dataset, val_dataset = random_split(final_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=2)

## Build CNN ##

very very simple cnn model

In [None]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.layer4 = nn.Sequential(
            nn.Linear(2*2*256, 512),
            nn.Dropout(0.25),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out.flatten(1))
        return out

If there exist gpu, use it

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"[+] Train with {device}")

model = CNN().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
print("[+] Train Start")
total_epochs = 50

for epoch in range(total_epochs):
    train_l = []
    val_l = []
    for x, y in train_dataloader:
        model.train()
        
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        
        loss = criterion(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_l.append(loss.detach().cpu())
        
    total = 0
    correct = 0
    for x, y in val_dataloader:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        
        loss = criterion(y_pred, y)
        correct += (y_pred.argmax(dim=1) == y).sum().detach().cpu().item()
        total += len(y)
        
        val_l.append(loss.detach().cpu())
    print(f"Epoch: {epoch}, Train Loss: {np.array(train_l).mean():.3f}, Val Loss: {np.array(val_l).mean():.3f}, Val Accuracy: {correct / total:.3f}")


In [None]:
BASE_DIR = "/kaggle/input/devkor-image-classification/test"

class TestDataset(Dataset):
    def __init__(self):
        super().__init__()
        self.file_list = [os.path.join(BASE_DIR, f"{str(i).zfill(4)}.png") for i in range(10000)]
        
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img = Image.open(self.file_list[idx]).convert('RGB')
        return transform_normal(img)

test_dataset = TestDataset()
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
pred = []
for x in test_dataloader:
    pred += model(x.to(device)).detach().cpu().argmax(dim=1).tolist()
print(len(pred))

## Submission ##

In [None]:
submission = pd.read_csv("/kaggle/input/devkor-image-classification/sample_submission.csv")
submission.head()

In [None]:
submission.loc[:, "label"] = pred
submission.to_csv("submission.csv", index=False)
submission.head()