In [1]:
# from: https://www.youtube.com/watch?v=9OHlgDjaE2I

In [2]:
# Import Libraries
import os
import numpy as np
import torch
import glob
import PIL
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [4]:
# Check for cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device.type

'cuda'

In [5]:
# Image Transforms
transformer = transforms.Compose([transforms.Resize((150,150)) # resizes image to 150 x 150 pixels 
                                  , transforms.RandomHorizontalFlip() # flips images  randomly, creates another copy
                                  , transforms.ToTensor() # 0-255 to 0-1, from numpy to tensors
                                  , transforms.Normalize([0.5, 0.5, 0.5] # 0-1 to [-1, 1], formual: (x-mean)/stdev
                                                         , [0.5, 0.5, 0.5]
                                                        )
                                 ]
                                )

In [14]:
# Path for the training and testing directories
train_path = '/home/david/github/intel_image_classification/scene_detection/seg_train/seg_train'
test_path = '/home/david/github/intel_image_classification/scene_detection/seg_test/seg_test'

# Dataloader, helps to load the data
train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path, transform = transformer)
                          , batch_size = 64
                          , shuffle = True
                         )

test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path, transform = transformer)
                         , batch_size = 32
                         , shuffle = True
                        )

In [7]:
# Categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
classes

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [8]:
# CNN Network class
class ConvNet(nn.Module):
    def __init__(self, num_classes = 6):
        super(ConvNet, self).__init__()
        
        # Output size after convolution filter
        # ((w-f+2P)/s) + 1
        # w is 150, f = kernel size filter 3, padding P = 1, stride s = 1
        
        # Input shape = (256, 3, 150, 150) # 256 is the batch size, 3 is the channels RGB, 150 x 150 width and height
        self.conv1 = nn.Conv2d(in_channels = 3
                               , out_channels = 12
                               , kernel_size = 3
                               , stride = 1
                               , padding = 1
                              )
        
        # New shape = (256, 12, 150, 150) new depth of 12 not 3
        self.bn1 = nn.BatchNorm2d(num_features = 12)
        # shape = (256, 12, 150, 150)
        self.relu1 = nn.ReLU()
        # shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size = 2) # reduce the image size factor of 2
        # shape = (256, 12, 75, 75)
        
        self.conv2 = nn.Conv2d(in_channels = 12
                               , out_channels = 20
                               , kernel_size = 3
                               , stride = 1
                               , padding = 1
                              )
        # New shape = (256, 20, 75, 75) 
        self.relu2 = nn.ReLU()
        # New shape = (256, 20, 75, 75) 
        
        
        self.conv3 = nn.Conv2d(in_channels = 20
                       , out_channels = 32
                       , kernel_size = 3
                       , stride = 1
                       , padding = 1
                      )
        # New shape = (256, 32, 75, 75) 
        self.bn3 = nn.BatchNorm2d(num_features = 32)
        # New shape = (256, 32, 75, 75) 
        self.relu3 = nn.ReLU()
        # New shape = (256, 32, 75, 75) 
        
        # Add the fully connected layer
        self.fc = nn.Linear(in_features = 32*75*75 
                            , out_features = num_classes
                           )
        
        # Feed forward function 
        
        def forward(self, input):
            output = self.conv1(input)
            output = self.bn1(output)
            output = self.relu1(output)
            
            output = self.pool(output)
            
            output = self.conv2(output)
            output = self.relu2(output)
            
            output = self.conv3(output)
            output = self.bn3(output)
            output = self.relu3(output)
            
            # Above output will be in matrix form, with shape (256, 32, 75, 75)
            
            output = output.view(-1, 32*75*75)
            
            output = self.fc(output)
            
            return output

In [9]:
model = ConvNet(num_classes = 6).to(device)

In [11]:
# Optimizer and loss function
optimizer = Adam(model.parameters()
                 , lr=0.001
                 , weight_decay = 0.0001
                )
loss_function = nn.CrossEntropyLoss()

In [12]:
# Use 10 Epochs 
num_epochs = 10

In [13]:
# Calculatiing th esize of training and testing images
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))

print(f'Train Count: {train_count}')
print(f'Test Count: {test_count}')

Train Count: 14034
Test Count: 3000


In [15]:
# Model training and saving best model

best_accuracy = 0.0

for epoch in range(num_epochs):
    
    # Evalution oand training on training dataset
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        
        train_accuracy += int(torch.sum(prediction == labels.data))
    
    train_accuracy = train_accuracy / train_count
    train_loss = train_loss / train_count
    
    
    # Evluation on testing dataset
    model.eval()
    
    test_accuracy = 0.0
    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction == labels.data))
        
        test_accuracy = test_accuracy / test_count
        
        print(f'Epoch: {epoch} Train Loss: {train_loss} Train Accuracy: {train_accuracy} Test Accuracy: {test_accuracy}')
        
        # Save the best model
        
        if test_accuracy > best_accuracy:
            torch.save(model.state_dict(), 'best_checkpoint.model')
            best_accuracy = test_accuracy
        


NotImplementedError: 