<a href="https://colab.research.google.com/github/zbooster/Landmark-Image-Classification/blob/main/landmark_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 데이터 준비


## Google 드라이브에서 Resize된 데이터 가져오기
gdown과 unzip을 이용하여 압축을 풀어서 나의 colab 폴더에 둔다

In [None]:
!gdown 1FSb1ahdmNENxvZvg921R47_ZfNJ0O8va

Downloading...
From: https://drive.google.com/uc?id=1FSb1ahdmNENxvZvg921R47_ZfNJ0O8va
To: /content/resizeds.zip
 51% 1.58G/3.09G [00:06<00:06, 226MB/s]

In [None]:
import os

original_dataset_dir = '/content/datasets/resizeds/Training'
classes_list = os.listdir(original_dataset_dir)

classes_list[:2], len(classes_list)

(['가재마을세종호반베르디움2단지아파트', '새뜸마을10단지더샵힐스테이트아파트'], 84)

# 데이터 정리

## 폴더 생성


In [None]:
base_dir = './splitted'
os.mkdir(base_dir)

FileExistsError: ignored

In [None]:
import shutil

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)

for cls in classes_list:
  os.mkdir(os.path.join(train_dir, cls))
  os.mkdir(os.path.join(validation_dir, cls))

## 데이터 복사

In [None]:
import math

original_dataset_dir = '/content/datasets/resizeds'

for cls in classes_list:
  train_path = os.path.join(original_dataset_dir, 'Training', cls)
  train_fnames = os.listdir(train_path)

  print("Train size(", cls, "): ", len(train_fnames))
  for fname in train_fnames:
    src = os.path.join(train_path, fname)
    dst = os.path.join(os.path.join(train_dir, cls), fname)
    shutil.copyfile(src, dst)

  val_path = os.path.join(original_dataset_dir, 'Validation', cls)
  val_fnames = os.listdir(val_path)
  print("Validation size(", cls, "): ", len(val_fnames))
  for fname in val_fnames:
    src = os.path.join(val_path, fname)
    dst = os.path.join(os.path.join(validation_dir, cls), fname)
    shutil.copyfile(src, dst)

# 학습준비

In [None]:
import torch
import os

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
BATCH_SIZE = 256
EPOCH = 30

In [None]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train/', transform=transform_base)
val_dataset = ImageFolder(root='./splitted/val/', transform=transform_base)

In [None]:
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=2)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

  def __init__(self):

    super(Net, self).__init__()

    self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
    self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

    self.fc1 = nn.Linear(4096, 512)
    self.fc2 = nn.Linear(512, 84)

  def forward(self, x):

    x = self.conv1(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)

    x = self.conv2(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)

    x = self.conv3(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)

    x = x.view(-1, 4096)
    x = self.fc1(x)
    x = F.relu(x)
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.fc2(x)

    return F.log_softmax(x, dim=1)

In [None]:
model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, optimizer):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(DEVICE), target.to(DEVICE)
    optimizer.zero_grad()
    output = model(data)
    loss = F.cross_entropy(output, target)
    loss.backward()
    optimizer.step()

In [None]:
def evaluate(model, test_loader):
  model.eval()
  test_loss = 0
  correct = 0

  with torch.no_grad():
    for data, target in test_loader:
      data, target =  data.to(DEVICE), target.to(DEVICE)
      output = model(data)

      test_loss += F.cross_entropy(output, target, reduction='sum').item()

      pred = output.max(1, keepdim=True)[1]
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_accuracy = 100. * correct / len(test_loader.dataset)
  return test_loss, test_accuracy

In [None]:
import time
import copy

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs=30):
  best_acc = 0.0
  best_model_wts = copy.deepcopy(model.state_dict())

  for epoch in range(1, num_epochs + 1):
    since = time.time()
    train(model, train_loader, optimizer)
    train_loss, train_acc = evaluate(model, train_loader)
    val_loss, val_acc = evaluate(model, val_loader)

    if val_acc > best_acc:
      best_acc = val_acc
      best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('----------------- epoch {} ------------------'.format(epoch))
    print('train Loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))
    print('val Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss, val_acc))
    print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

  model.load_state_dict(best_model_wts)
  return model

# 학습 및 저장

In [None]:
base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)
torch.save(base, 'baseline.pt')