<a href="https://colab.research.google.com/github/xoxonut/nycu-cv-113-2/blob/main/CV_HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!tar -xvzf "/content/drive/MyDrive/hw1-data.tar.gz" -C "/content" > /dev/null 2>&1
!du -sh "/content/data"
!awk 'BEGIN {sum=0} /^[0-9]+$/ {sum+=$1} END {print sum}' <(find "/content/data/train" -type f | wc -l) <(find "/content/data/val" -type f | wc -l)

6.5G	/content/data
21024


In [5]:
from torchvision import datasets
from torchvision.transforms import v2
from torchvision.models import resnet50,ResNet50_Weights
from torch.utils.data import DataLoader,Dataset
from torch import nn
import torch
import pandas as pd
import matplotlib.pyplot as plt
import os
import PIL.Image as Image
import numpy as np


In [10]:
def get_transforms():
  train_transform = v2.Compose([
      v2.Lambda(lambda img: img.convert("RGB")),
      v2.Resize((224,224)),
      v2.ColorJitter(hue=.05, saturation=.05),
      v2.RandomHorizontalFlip(),
      v2.RandomRotation(20),
      v2.ToTensor(),
      v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  ])
  test_transform = v2.Compose([
      v2.Lambda(lambda img: img.convert("RGB")),
      v2.Resize((224,224)),
      v2.ToTensor(),
      v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  ])
  return train_transform, test_transform

In [23]:
def create_dataloader(train_transform,test_transform,batch_size):
  train_set = datasets.ImageFolder("/content/data/train",transform=train_transform)
  val_set = datasets.ImageFolder("/content/data/val",transform=test_transform)
  train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False)
  val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
  print(123)
  class TestDataSet(Dataset):
    def __init__(self, root_dir, transform=None):
      self.root_dir = root_dir
      self.transform = transform
      valid_exts = (".jpg",".jpeg",".png",".bmp")
      self.image_paths = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.lower().endswith(valid_exts)]

    def __len__(self):
      return len(self.image_paths)

    def __getitem__(self, index):
      image_path = self.image_paths[index]
      image = Image.open(image_path).convert("RGB")
      if self.transform:
        image = self.transform(image)
      file_name = os.path.basename(image_path)
      name_only = os.path.splitext(file_name)[0]
      return image,name_only
  test_set = TestDataSet("/content/data/test",transform=test_transform)
  test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
  print(train_set.class_to_idx)
  print(val_set.class_to_idx)
  return train_loader,val_loader,test_loader

In [24]:
  train_transform,test_transform = get_transforms()
  train_loader,val_loader,test_loader = create_dataloader(train_transform,test_transform,32)


FileNotFoundError: Found no valid file for the classes .ipynb_checkpoints. Supported extensions are: .jpg, .jpeg, .png, .ppm, .bmp, .pgm, .tif, .tiff, .webp

In [59]:
def train_one_epoch(model,train_loader,optimizer,criterion,device):
  model.train()
  total_loss, correct, total = 0, 0, 0
  for inputs, labels in train_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
    predicted = torch.argmax(outputs, dim=1)
    correct += (predicted == labels).sum().item()
    print(predicted)
    print(labels)
    print(predicted == labels)
    total += labels.size(0)
  avg_loss = total_loss / len(train_loader)
  avg_acc = correct / total * 100
  return avg_loss, avg_acc

In [60]:
def validate(model,val_loader,criterion,device):
  model.eval()
  total_loss, correct, total = 0, 0, 0
  with torch.no_grad():
    for inputs, labels in val_loader:
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      loss = criterion(outputs, labels)

      total_loss += loss.item()
      predicted = torch.argmax(outputs, dim=1)
      correct += (predicted == labels).sum().item()
      total += labels.size(0)
  avg_loss = total_loss / len(val_loader)
  avg_acc = correct / total * 100
  return avg_loss, avg_acc

In [61]:
def test_model(model,test_loader,device):
  model.eval()
  correct, total = 0, 0
  predictions = []
  with torch.no_grad():
    for inputs, file_names in test_loader:
      inputs = inputs.to(device)
      outputs = model(inputs)
      preds = torch.argmax(outputs, dim=1)

      for file_name, pred in zip(file_names, preds):
        predictions.append({"image_name": file_name,
                            "pred_label": pred.item()})
  return predictions


In [62]:
def init_RestNet50(num_classes):
  model = resnet50(weights=ResNet50_Weights.DEFAULT)
  num_ftrs = model.fc.in_features
  model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, num_classes)
  )
  for p in model.parameters():
    p.requires_grad = False
  for p in model.fc.parameters():
    p.requires_grad = True
  for p in model.layer4.parameters():
    p.requires_grad = True
  return model

In [63]:
import matplotlib.pyplot as plt

def plot_loss_accuracy(train_losses, val_losses, train_accs, val_accs):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(12, 4))

    # Plot losses
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Training Loss')
    plt.plot(epochs, val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    # Plot accuracies
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accs, label='Training Accuracy')
    plt.plot(epochs, val_accs, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()


In [64]:
def print_model_size(model):
    param_size = sum(p.numel() * p.element_size() for p in model.parameters())  # in bytes
    buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())  # in bytes
    total_size = (param_size + buffer_size) / (1024 ** 2)  # Convert to MB
    print(f"Model size: {total_size:.2f} MB")


In [66]:
if __name__ == "__main__":
  train_transform,test_transform = get_transforms()
  train_loader,val_loader,test_loader = create_dataloader(train_transform,test_transform,32)
  model = init_RestNet50(100)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.AdamW(model.parameters(), lr=0.001,weight_decay=0.01)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8)
  epochs = 1
  train_losses, train_accs, val_losses, val_accs = [], [], [], []
  print_model_size(model)
  model=torch.load("/content/drive/MyDrive/full_model.pth")
  exit()
  for epoch in range(epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    torch.save(model,"/content/drive/MyDrive/full_model.pth")
    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f} - Train Acc: {train_acc:.2f}% - Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.2f}%")
  plot_loss_accuracy(train_losses, val_losses, train_accs, val_accs)
  res = test_model(model,test_loader,device)
  df = pd.DataFrame(res)
  df.to_csv("/content/drive/MyDrive/prediction.csv",index=False)




Model size: 94.08 MB


  model=torch.load("/content/drive/MyDrive/full_model.pth")


tensor([ 8, 69,  3, 87, 43, 78, 72, 47, 81, 82, 20, 80, 16, 10, 74, 81, 36, 42,
        81, 66, 51, 74, 78, 90, 75, 14,  3, 90, 63, 32, 60, 37],
       device='cuda:0')
tensor([ 8, 69,  3, 87, 43, 82, 55, 47, 81, 78, 20, 65, 16, 10, 43, 81, 36, 42,
        81, 66, 51, 74, 82, 90, 75, 14,  3,  3, 63, 32, 60, 37],
       device='cuda:0')
tensor([ True,  True,  True,  True,  True, False, False,  True,  True, False,
         True, False,  True,  True, False,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True, False,  True,  True,
         True,  True], device='cuda:0')
tensor([47, 60, 48, 35, 41, 30, 37, 72, 97, 61, 90, 98, 41, 16, 48, 89, 45, 45,
        87, 58, 25, 30, 92, 72, 47, 51, 74,  3, 76, 26, 14, 12],
       device='cuda:0')
tensor([47, 60, 48, 35, 41, 30, 37, 72, 97, 61, 90, 98, 41, 16, 48, 89, 45, 45,
        23, 58, 25, 30, 92, 72, 55, 12, 74,  3, 76, 46, 14, 12],
       device='cuda:0')
tensor([ True,  True,  True,  True,  True,  True, 

KeyboardInterrupt: 

In [1]:
from IPython.display import display
model.eval()
class TestDataSet(Dataset):
  def __init__(self, root_dir, transform=None):
    self.root_dir = root_dir
    self.transform = transform
    valid_exts = (".jpg",".jpeg",".png",".bmp")
    self.image_paths = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.lower().endswith(valid_exts)]

  def __len__(self):
    return len(self.image_paths)

  def __getitem__(self, index):
    image_path = self.image_paths[index]
    image = Image.open(image_path).convert("RGB")
    if self.transform:
      image = self.transform(image)
    file_name = os.path.basename(image_path)
    name_only = os.path.splitext(file_name)[0]
    return image,name_only
test_set = TestDataSet("/content/data/train/0",transform=test_transform)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
with torch.no_grad():
  for inputs, file_names in test_loader:
    inputs = inputs.to(device)
    outputs = model(inputs)
    preds = torch.argmax(outputs, dim=1)
    print(preds)

NameError: name 'model' is not defined