In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cp /content/drive/MyDrive/car_color_dataset.zip ./

In [3]:
!unzip /content/car_color_dataset.zip -d ./

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./train/orange/e4f9205ccc.jpg  
  inflating: ./train/orange/e532c363be.jpg  
  inflating: ./train/orange/e5d2677717.jpg  
  inflating: ./train/orange/e6bbd5bd7c.jpg  
  inflating: ./train/orange/e6c8c2eb92.jpg  
  inflating: ./train/orange/e734cc9748.jpg  
  inflating: ./train/orange/e75ad9cbe4.jpg  
  inflating: ./train/orange/e7793a0aef.jpg  
  inflating: ./train/orange/e8005c8ac5.jpg  
  inflating: ./train/orange/e85a7f559d.jpg  
  inflating: ./train/orange/e865d8da86.jpg  
  inflating: ./train/orange/e8fd7563ed.jpg  
  inflating: ./train/orange/ea48d0930b.jpg  
  inflating: ./train/orange/ea71c13a20.jpg  
  inflating: ./train/orange/ea75ce4328.jpg  
  inflating: ./train/orange/eacc03787d.jpg  
  inflating: ./train/orange/eae2b8608d.jpg  
  inflating: ./train/orange/eb25a96e49.jpg  
  inflating: ./train/orange/eb704000fa.jpg  
  inflating: ./train/orange/ebd0009374.jpg  
  inflating: ./train/orange/ef03053

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
import torch.nn.functional as F

import shutil
import time
import collections



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 32
NUM_CLASSES = 15
SIZE = 128
EPOCHS = 30

In [5]:
# Define transformations
transform = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.Resize((120, 120)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_train = transforms.Compose([transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.Resize((120, 120)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),])

# Create datasets and dataloaders
train_dataset = ImageFolder('/content/train', transform=transform_train)
test_dataset = ImageFolder('/content/test', transform=transform)
valid_dataset = ImageFolder('/content/val', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)




In [6]:
import copy
def train(epochs, model, criterion, optimizer, train_loader, valid_loader, test_loader):
  best_model = None
  best_acc = 0

  # Training loop
  for epoch in range(epochs):
      model.train()
      total_train_correct = 0
      total_train_samples = 0
      for images, labels in train_loader:
          images, labels = images.to(device), labels.to(device)
          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          _, predicted = torch.max(outputs, 1)
          total_train_correct += (predicted == labels).sum().item()
          total_train_samples += labels.size(0)
      train_accuracy = total_train_correct / total_train_samples

      # Validation loop
      model.eval()
      with torch.no_grad():
          total_correct = 0
          total_samples = 0
          for images, labels in valid_loader:
              images, labels = images.to(device), labels.to(device)
              outputs = model(images)
              _, predicted = torch.max(outputs, 1)
              total_correct += (predicted == labels).sum().item()
              total_samples += labels.size(0)

          accuracy = total_correct / total_samples
          print(f'Epoch [{epoch+1}/{epochs}], Training Accuracy: {train_accuracy:.4f}, Validation Accuracy: {accuracy:.4f}')

          if (accuracy > best_acc):
            print("New Best Model with Accuracy: ", accuracy)
            best_acc = accuracy
            best_model = copy.deepcopy(model)

  print("Training finished.")

  # Testing the model
  model.eval()
  with torch.no_grad():
      total_correct = 0
      total_samples = 0
      for images, labels in test_loader:
          images, labels = images.to(device), labels.to(device)
          outputs = best_model(images)
          _, predicted = torch.max(outputs, 1)
          total_correct += (predicted == labels).sum().item()
          total_samples += labels.size(0)

      accuracy = total_correct / total_samples
      print(f'Test Accuracy On best Model: {accuracy:.4f}')
  return best_model

# Model

In [23]:
class VehicleColorRecognitionModel(nn.Module):
    def __init__(self,Load_VIS_URL=None):
        super(VehicleColorRecognitionModel,self).__init__()

        # ===============================  top ================================
        # first top convolution layer
        self.top_conv1 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(3, 48, kernel_size=(11,11), stride=(4,4)),
            nn.ReLU(),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )


        # first top convolution layer    after split
        self.top_top_conv2 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(24, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        self.top_bot_conv2 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(24, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )


        #  need a concat

        # after concat
        self.top_conv3 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(128, 192, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )

        # fourth top convolution layer
        # split feature map by half
        self.top_top_conv4 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 96, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )

        self.top_bot_conv4 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 96, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )


        # fifth top convolution layer
        self.top_top_conv5 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.top_bot_conv5 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

#        # ===============================  bottom ================================


#         # first bottom convolution layer
        self.bottom_conv1 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(3, 48, kernel_size=(11,11), stride=(4,4)),
            nn.ReLU(),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )


        # first top convolution layer    after split
        self.bottom_top_conv2 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(24, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        self.bottom_bot_conv2 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(24, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )


        #  need a concat

        # after concat
        self.bottom_conv3 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(128, 192, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )

        # fourth top convolution layer
        # split feature map by half
        self.bottom_top_conv4 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 96, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )

        self.bottom_bot_conv4 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 96, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU()
        )


        # fifth top convolution layer
        self.bottom_top_conv5 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.bottom_bot_conv5 = nn.Sequential(
            # 1-1 conv layer
            nn.Conv2d(96, 64, kernel_size=(3,3), stride=(1,1),padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        n_channels = self._forward(torch.empty(1,3,SIZE,SIZE)).size()[-1]

        # Fully-connected layer
        self.classifier = nn.Sequential(
            nn.Linear(n_channels, 4096),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(4096, 15)
        )

    def _forward(self, x):
      x_top = self.top_conv1(x)
      # print(x_top.shape)

      x_top_conv = torch.split(x_top, 24, 1)

      x_top_top_conv2 = self.top_top_conv2(x_top_conv[0])
      x_top_bot_conv2 = self.top_bot_conv2(x_top_conv[1])

      x_top_cat1 = torch.cat([x_top_top_conv2,x_top_bot_conv2],1)

      x_top_conv3 = self.top_conv3(x_top_cat1)

      x_top_conv3 = torch.split(x_top_conv3, 96, 1)

      x_top_top_conv4 = self.top_top_conv4(x_top_conv3[0])
      x_top_bot_conv4 = self.top_bot_conv4(x_top_conv3[1])

      x_top_top_conv5 = self.top_top_conv5(x_top_top_conv4)
      x_top_bot_conv5 = self.top_bot_conv5(x_top_bot_conv4)

      x_bottom = self.bottom_conv1(x)

      x_bottom_conv = torch.split(x_bottom, 24, 1)

      x_bottom_top_conv2 = self.bottom_top_conv2(x_bottom_conv[0])
      x_bottom_bot_conv2 = self.bottom_bot_conv2(x_bottom_conv[1])

      x_bottom_cat1 = torch.cat([x_bottom_top_conv2,x_bottom_bot_conv2],1)

      x_bottom_conv3 = self.bottom_conv3(x_bottom_cat1)

      x_bottom_conv3 = torch.split(x_bottom_conv3, 96, 1)

      x_bottom_top_conv4 = self.bottom_top_conv4(x_bottom_conv3[0])
      x_bottom_bot_conv4 = self.bottom_bot_conv4(x_bottom_conv3[1])

      x_bottom_top_conv5 = self.bottom_top_conv5(x_bottom_top_conv4)
      x_bottom_bot_conv5 = self.bottom_bot_conv5(x_bottom_bot_conv4)

      x_cat = torch.cat([x_top_top_conv5,x_top_bot_conv5,x_bottom_top_conv5,x_bottom_bot_conv5],1)


      flatten = x_cat.view(x_cat.size(0), -1)

      return flatten


    def forward(self,x):
        output = self.classifier(self._forward(x))

        #output = F.softmax(output)


        return output



# Training

In [24]:


# Initialize the model, loss function, and optimizer
model = VehicleColorRecognitionModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


best_model = train(EPOCHS,model, criterion, optimizer, train_loader, valid_loader, test_loader)



KeyboardInterrupt: 

In [None]:
best_model = train(EPOCHS,model, criterion, optimizer, train_loader, valid_loader, test_loader)


Epoch [1/30], Training Accuracy: 0.7769, Validation Accuracy: 0.7942
New Best Model with Accuracy:  0.7941935483870968
Epoch [2/30], Training Accuracy: 0.7702, Validation Accuracy: 0.7935
Epoch [3/30], Training Accuracy: 0.7690, Validation Accuracy: 0.7768
Epoch [4/30], Training Accuracy: 0.7740, Validation Accuracy: 0.7865
Epoch [5/30], Training Accuracy: 0.7729, Validation Accuracy: 0.8026
New Best Model with Accuracy:  0.8025806451612904
Epoch [6/30], Training Accuracy: 0.7785, Validation Accuracy: 0.7968
Epoch [7/30], Training Accuracy: 0.7735, Validation Accuracy: 0.7800
Epoch [8/30], Training Accuracy: 0.7802, Validation Accuracy: 0.7787
Epoch [9/30], Training Accuracy: 0.7798, Validation Accuracy: 0.7935
Epoch [10/30], Training Accuracy: 0.7771, Validation Accuracy: 0.7723
Epoch [11/30], Training Accuracy: 0.7732, Validation Accuracy: 0.7852
Epoch [12/30], Training Accuracy: 0.7868, Validation Accuracy: 0.7806
Epoch [13/30], Training Accuracy: 0.7773, Validation Accuracy: 0.7852

In [None]:
# Training

In [None]:
def test_accuracy(model, test_loader):
  with torch.no_grad():
      total_correct = 0
      total_samples = 0
      for images, labels in test_loader:
          images, labels = images.to(device), labels.to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs, 1)
          total_correct += (predicted == labels).sum().item()
          total_samples += labels.size(0)

      accuracy = total_correct / total_samples
      print(f'Test Accuracy On best Model: {accuracy:.4f}')

In [None]:
torch.save(best_model,"vehicle_model_30e_64b_aug.pt")

In [None]:
torch.save(best_model,"vehicle_model_60e_64b_aug.pt")

In [None]:
test_accuracy(best_model, test_loader)




Test Accuracy On best Model: 0.7943


In [None]:
test_accuracy(best_model, test_loader)


Test Accuracy On best Model: 0.7853


In [None]:
test_accuracy(best_model, test_loader)

Test Accuracy On best Model: 0.7796


In [None]:
testmodel = torch.load("vehicle_model.pt")
test_accuracy(testmodel, test_loader)

Test Accuracy On best Model: 0.7410
