In [1]:
# Update gdown
!pip install --upgrade --no-cache-dir gdown

# Download the dataset from Google Drive
!gdown --id '16FvOGDl-9cL1nOt0cKKJQvWWysMStTPX&export' --output data.zip
!gdown --id '1ReuU9QWr5Da-VeTOQIFf-WvFZps1PVmW&export' --output yolo_data.zip

# Unzip the dataset.
!unzip -o data.zip
!unzip -o yolo_data.zip
!ls

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
  inflating: resultTrain/00877.jpg   
  inflating: resultTrain/00878.jpg   
  inflating: resultTrain/00879.jpg   
  inflating: resultTrain/00880.jpg   
  inflating: resultTrain/00881.jpg   
  inflating: resultTrain/00883.jpg   
  inflating: resultTrain/00885.jpg   
  inflating: resultTrain/00886.jpg   
  inflating: resultTrain/00887.jpg   
  inflating: resultTrain/00888.jpg   
  inflating: resultTrain/00889.jpg   
  inflating: resultTrain/00892.jpg   
  inflating: resultTrain/00893.jpg   
  inflating: resultTrain/00894.jpg   
  inflating: resultTrain/00895.jpg   
  inflating: resultTrain/00896.jpg   
  inflating: resultTrain/00897.jpg   
  inflating: resultTrain/00898.jpg   
  inflating: resultTrain/00899.jpg   
  inflating: resultTrain/00900.jpg   
  inflating: resultTrain/00901.jpg   
  inflating: resultTrain/00902.jpg   
  inflating: resultTrain/00903.jpg   
  inflating: resultTrain/00904.jpg   
  inflating: resultTrain/00905.jpg   
  inflating: re

In [2]:
import torch
import torchvision
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
from torch.utils.data import (
    DataLoader, Dataset
)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
import torchvision.io as tvio
import pandas as pd
import numpy as np
import os
import sys

In [3]:
!nvidia-smi

Wed Dec 14 11:56:00 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P0    28W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [5]:
torch.cuda.empty_cache()

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
# Hyperparameters
num_classes = 3
learning_rate = 1e-3
batch_size = 100
num_epochs = 100
early_stop = 10

# VGG-16 Model

In [8]:
# Load pretrain model & modify it
model = torchvision.models.vgg16(pretrained=True)

# If you want to do finetuning then set requires_grad = False
# Remove these two lines if you want to train entire model,
# and only want to load the pretrain weights.
for param in model.parameters():
    param.requires_grad = True

model.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 512),
    nn.Dropout(p=0.25),
    nn.Linear(512, 256),
    nn.Dropout(p=0.25),
    nn.BatchNorm1d(256),
    nn.Linear(256, num_classes)
)
model.to(device)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [9]:
from torchsummary import summary

summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [10]:
transform = torchvision.transforms.Compose([
    #轉成tensor格式
    # torchvision.transforms.ToTensor(),
    #將短邊等比放大成224
    torchvision.transforms.Resize(224),
    #裁切多於的部分
    torchvision.transforms.CenterCrop(224),
    #正規化
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

In [11]:
class CustomImageDataset(Dataset):
  def __init__(self, annotations_file, img_dir, transform=None):
    self.img_labels = pd.read_csv(annotations_file)
    self.img_dir = img_dir
    self.transform = transform
    unique, inverse = np.unique(self.img_labels.iloc[:, 1], return_inverse=True)
    self.onehot_list = np.eye(unique.shape[0])[inverse]

  def __len__(self):
    return len(self.img_labels)

  def __getitem__(self, idx):
    img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
    image = tvio.read_image(img_path) #讀取圖片
    label = self.onehot_list[idx] #第0列=檔名，第1列=label
    image = self.transform(image.float())
    return image, label

train_dataset = CustomImageDataset('train.csv', 'resultTrain', transform=transform)
dev_dataset = CustomImageDataset('dev.csv', 'resultDev', transform=transform)

In [13]:
#load data
train_num = int(len(train_dataset)*0.7)
valid_num = len(train_dataset)-train_num
train_set, valid_set = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True)
valid_loader = DataLoader(valid_set, batch_size = batch_size, shuffle = False)

In [14]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
# Define training
def train(train_loader, dev_loader, model, device):
  # settings
  path = 'vgg-yolo.pth'
  
  epoch = 0
  min = sys.maxsize
  early_stop_cnt = 0
  
  while epoch < num_epochs:
    model.train()
    for i, (x,y) in enumerate(train_loader):
        optimizer.zero_grad()
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

    if dev_loader is not None:
      dev_loss = dev(dev_loader, model, device)
      if dev_loss < min:
        torch.save(model, path)
        min = dev_loss
        print('epoch {}: loss = {}'.format(epoch+1, min))
        early_stop_cnt = 0
      else:
        early_stop_cnt += 1
    epoch += 1
    if early_stop_cnt > early_stop:
      break

  print('Finished training after {} epochs'.format(epoch))

# Define validation
def dev(dev_loader, model, device):
    model.eval()
    total_loss = 0
    for x, y in dev_loader:
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            pred = model(x)
            mse_loss = criterion(pred, y)
        total_loss += mse_loss.detach().cpu().item()*len(x)
    
    return total_loss / len(dev_loader.dataset)

In [16]:
train(train_loader, valid_loader, model, device)

epoch 1: loss = 0.9350061795958646
epoch 3: loss = 0.6591914582287015
epoch 4: loss = 0.574328299278853
epoch 9: loss = 0.5588553527393145
epoch 11: loss = 0.5507558862417067
Finished training after 22 epochs


In [17]:
test_loader = DataLoader(dev_dataset, batch_size = batch_size, shuffle = False)

In [18]:
# Define testing
def test(test_loader, model, device):
  model.eval()
  pred_list = []
  test_y = []
  for x, y in test_loader:
    x = x.to(device)
    with torch.no_grad():
      pred = model(x)
      pred_list.append(pred.detach().cpu())

  pred_list = torch.cat(pred_list, dim=0).numpy()
  print("Done testing")
  return pred_list

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference

In [19]:
# Testing
vgg_model = torch.load('vgg-yolo.pth').to(device)
test_pred = test(test_loader, vgg_model, device)
res_list = []
for test in test_pred:
    res = softmax(test)
    x = np.zeros(3)
    x[np.argmax(res)] = 1
    res_list.append(x)

Done testing


In [20]:
test_y_list = []
for x, batch_y in test_loader:
  for y in batch_y:
    test_y_list.append(y)

In [21]:
total = 0
for i in range(len(res_list)):
    if np.argmax(res_list[i]) == np.argmax(test_y_list[i]):
        total += 1
print('accuracy', total/len(res_list))

accuracy 0.76375


In [22]:
for idx, each in enumerate(test_y_list):
  test_y_list[idx] = each.tolist()

<class 'list'>


In [23]:
from sklearn.metrics import f1_score

In [24]:
f1_score(test_y_list, res_list, average='micro')

0.76375

In [25]:
f1_score(test_y_list, res_list, average='macro')

0.7678865511182584

In [26]:
f1_score(test_y_list, res_list, average=None)

array([0.7985348 , 0.67595819, 0.82916667])

#VGG-16 Model with YOLO

In [None]:
train_dataset = CustomImageDataset('train.csv', 'resultTrain', transform=transform)
dev_dataset = CustomImageDataset('dev.csv', 'resultDev', transform=transform)

In [None]:
#load data
train_num = int(len(train_dataset)*0.7)
valid_num = len(train_dataset)-train_num
train_set, valid_set = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True)
valid_loader = DataLoader(valid_set, batch_size = batch_size, shuffle = False)

In [None]:
# Define training
def train(train_loader, dev_loader, model, device):
  # settings
  path = 'vgg-yolo.pth'
  
  epoch = 0
  min = sys.maxsize
  early_stop_cnt = 0
  
  while epoch < num_epochs:
    model.train()
    for i, (x,y) in enumerate(train_loader):
        optimizer.zero_grad()
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

    if dev_loader is not None:
      dev_loss = dev(dev_loader, model, device)
      if dev_loss < min:
        torch.save(model, path)
        min = dev_loss
        print('epoch {}: loss = {}'.format(epoch+1, min))
        early_stop_cnt = 0
      else:
        early_stop_cnt += 1
    epoch += 1
    if early_stop_cnt > early_stop:
      break

  print('Finished training after {} epochs'.format(epoch))

# Define validation
def dev(dev_loader, model, device):
    model.eval()
    total_loss = 0
    for x, y in dev_loader:
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            pred = model(x)
            mse_loss = criterion(pred, y)
        total_loss += mse_loss.detach().cpu().item()*len(x)
    
    return total_loss / len(dev_loader.dataset)

In [None]:
train(train_loader, valid_loader, model, device)

In [None]:
test_loader = DataLoader(dev_dataset, batch_size = batch_size, shuffle = False)

In [None]:
# Define testing
def test(test_loader, model, device):
  model.eval()
  pred_list = []
  test_y = []
  for x, y in test_loader:
    x = x.to(device)
    with torch.no_grad():
      pred = model(x)
      pred_list.append(pred.detach().cpu())

  pred_list = torch.cat(pred_list, dim=0).numpy()
  print("Done testing")
  return pred_list

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference

In [None]:
# Testing
vgg_model = torch.load('vgg-yolo.pth').to(device)
test_pred = test(test_loader, vgg_model, device)
res_list = []
for test in test_pred:
    res = softmax(test)
    x = np.zeros(3)
    x[np.argmax(res)] = 1
    res_list.append(x)

In [None]:
test_y_list = []
for x, batch_y in test_loader:
  for y in batch_y:
    test_y_list.append(y)

In [None]:
total = 0
for i in range(len(res_list)):
    if np.argmax(res_list[i]) == np.argmax(test_y_list[i]):
        total += 1
print('accuracy', total/len(res_list))

In [None]:
for idx, each in enumerate(test_y_list):
  test_y_list[idx] = each.tolist()

In [None]:
f1_score(test_y_list, res_list, average='micro')

In [None]:
f1_score(test_y_list, res_list, average='macro')

In [None]:
f1_score(test_y_list, res_list, average=None)