<a href="https://colab.research.google.com/github/tu-mo/hoc_may/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Khai báo thư viện**

In [124]:
import torch
from torch.utils.data import DataLoader
from torch import nn

import torchvision
from torchvision import transforms

import matplotlib.pyplot as plt

from collections import namedtuple

from sklearn.metrics import classification_report

# **Lấy dữ liệu từ github**

In [125]:
!git clone https://github.com/tu-mo/hoc_may.git

fatal: destination path 'hoc_may' already exists and is not an empty directory.


# **Chuẩn bị dữ liệu**








In [126]:
TrainTest = namedtuple('TrainTest', ['train', 'test'])

def get_classes():
  classes = ['2C', '3C', '4C']
  return classes

def raw_image(size):
  transform_train = transforms.Compose([
    transforms.Resize(size=(size,size)),
    transforms.ToTensor()
  ])
  transform_test = transforms.Compose([
    transforms.Resize(size=(size,size)), 
    transforms.ToTensor()
  ])
  return transform_train, transform_test

def preprocess_image(size):
  transform_train = transforms.Compose([
    transforms.Resize(size=(size,size)),
    transforms.CenterCrop(size=(size,size)),
    transforms.ToTensor(),
    transforms.Normalize(
       mean=[0.485, 0.456, 0.406],
       std=[0.229, 0.224, 0.225]
    )
  ])
  transform_test = transforms.Compose([
    transforms.Resize(size=(size,size)), 
    transforms.ToTensor()
  ])
  return transform_train, transform_test

def augmentation_image(size):
  transform_train = transforms.Compose([
    transforms.Resize(size=(size,size)),
    transforms.RandomCrop(size=(size,size), padding=4,),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor()
  ])
  transform_test = transforms.Compose([
    transforms.Resize(size=(size,size)), 
    transforms.ToTensor()
  ])
  return transform_train, transform_test

def prepare_data(size):
  transform_train, transform_test = raw_image(size)
  # transform_train, transform_test = preprocess_image(size)
  # transform_train, transform_test = augmentation_image(size)
  trainset = torchvision.datasets.ImageFolder(root='/content/hoc_may/train', transform=transform_train)
  testset = torchvision.datasets.ImageFolder(root='/content/hoc_may/test', transform=transform_test)
  return TrainTest(train=trainset, test=testset)

def prepare_loader(datasets):
  trainloader = DataLoader(dataset=datasets.train, batch_size=32, shuffle=True, num_workers=4)
  testloader = DataLoader(dataset=datasets.test, batch_size=32, shuffle=False, num_workers=4)
  return TrainTest(train=trainloader, test=testloader)


# **Train + Test**

In [127]:
def train_epoch(epoch, model, loader, loss_func, optimizer, device):
  model.train()
  running_loss = 0.0
  reporting_steps = 32
  for i, (images, labels) in enumerate(loader):
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    loss = loss_func(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
    if i % reporting_steps == reporting_steps-1:
      print(f"Epoch {epoch} step {i} ave_loss {running_loss/reporting_steps:.4f}")
      running_loss = 0.0

def test_epoch(model, loader, device):
  ytrue = []
  ypred = []
  with torch.no_grad():
    model.eval()
    for i, (images, labels) in enumerate(loader):
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, dim=1)
      ytrue += list(labels.cpu().numpy())
      ypred += list(predicted.cpu().numpy())

  return ypred, ytrue

# **Thực thi**

In [5]:
def main(model = 'vgg16', size = 32):
  classes = get_classes()
  datasets = prepare_data(size)
  loaders = prepare_loader(datasets)
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  in_f = 0  # in_features
  out_f = len(get_classes())  #out_features

  if model == 'vgg16':
    print("vgg16")
    model = torchvision.models.vgg16()
    # vì bài toán có 3 phân lớp
    in_f = model.classifier[-1].in_features
    model.classifier[-1] = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'vgg19':
    print("vgg19")
    model = torchvision.models.vgg19()
    # vì bài toán có 3 phân lớp
    in_f = model.classifier[-1].in_features
    model.classifier[-1] = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'resnet50':
    print("resnet50")
    model = torchvision.models.resnet50()
    in_f = model.fc.in_features
    model.fc = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'densenet161':
    print("densenet161")
    model = torchvision.models.densenet161()
    in_f = model.classifier.in_features
    model.classifier = torch.nn.Linear(in_features=in_f, out_features=out_f)

  model.to(device)
  loss_func = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
  for epoch in range(10):
    train_epoch(epoch, model, loaders.train, loss_func, optimizer, device)
    ypred, ytrue = test_epoch(model, loaders.test, device)
    print(classification_report(ytrue, ypred, target_names=classes))

In [None]:
main('vgg16',32)

In [None]:
main('vgg16',224)

In [None]:
main('vgg19',32)

In [None]:
main('vgg19',224)

In [None]:
main('resnet50',32)

In [None]:
main('resnet50',224)

In [None]:
main('densenet161',32)


In [None]:
main('densenet161',224)

# **Thực thi với kết quả phân theo video**

In [128]:
import numpy as np

def main(model = 'vgg16', size = 32):
  classes = get_classes()
  datasets = prepare_data(size)
  loaders = prepare_loader(datasets)
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  in_f = 0  # in_features
  out_f = len(get_classes())  #out_features
  video = -1
  true = 0
  count = np.array([0,0,0])
  predic = 0
  ytrue_ = np.array([10])
  ypred_ = np.array([10])

  if model == 'vgg16':
    print("vgg16")
    model = torchvision.models.vgg16()
    # vì bài toán có 3 phân lớp
    in_f = model.classifier[-1].in_features
    model.classifier[-1] = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'vgg19':
    print("vgg19")
    model = torchvision.models.vgg19()
    # vì bài toán có 3 phân lớp
    in_f = model.classifier[-1].in_features
    model.classifier[-1] = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'resnet50':
    print("resnet50")
    model = torchvision.models.resnet50()
    in_f = model.fc.in_features
    model.fc = torch.nn.Linear(in_features=in_f, out_features=out_f)
  elif model == 'densenet161':
    print("densenet161")
    model = torchvision.models.densenet161()
    in_f = model.classifier.in_features
    model.classifier = torch.nn.Linear(in_features=in_f, out_features=out_f)

  model.to(device)
  loss_func = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
  for epoch in range(10):
    train_epoch(epoch, model, loaders.train, loss_func, optimizer, device)
  ypred, ytrue = test_epoch(model, loaders.test, device)
  
  for i in range (len(datasets.test.imgs)):
    if (datasets.test.imgs[i][0].split('C/')[1].split('_')[0] != video):
      if ((count[0] != 0) | (count[1] != 0) | (count[2] != 0)):
        max_ = count.max()
        for j in range (len(count)):
          if (count[j] == max_):
            predic = j
            break 
        print('video:',video, '  true:', true, '  predic:',predic)
        ytrue_ = np.append(ytrue_, true)
        ypred_ = np.append(ypred_, predic)
        count[:] = 0
      video = datasets.test.imgs[i][0].split('C/')[1].split('_')[0]
      true = ytrue[i] 
    count[ypred[i]] += 1
  print('video:',video, '  true:', true, '  predic:',predic)
  ytrue_ = np.append(ytrue_, true)
  ypred_ = np.append(ypred_, predic)
  ytrue_ = np.delete(ytrue_, 0)
  ypred_ = np.delete(ypred_, 0)

  print(classification_report(ytrue_, ypred_, target_names=classes))

In [None]:
main('vgg16',32)

In [None]:
main('vgg19',32)

In [None]:
main('resnet50',32)

In [None]:
main('densenet161',32)