# Transfer Learning - 식물잎의 사진으로 질병 분류하기
- dataset : https://drive.google.com/file/d/1tzhqc3384i5bd_OIX95ACYkaB44ZpxKj/view

## 1) 데이터셋

In [8]:
# google drive mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
# unzip dataset
!unzip -qq '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/dataset.zip' -d '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/dataset'

In [14]:
import os

original_dataset_dir = '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/dataset'
classes_list = os.listdir(original_dataset_dir)

base_dir = '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/splitted'

classes_list

['Pepper,_bell___healthy',
 'Grape___Esca_(Black_Measles)',
 'Pepper,_bell___Bacterial_spot',
 'Strawberry___healthy',
 'Grape___Black_rot',
 'Corn___Common_rust',
 'Apple___Apple_scab',
 'Potato___healthy',
 'Potato___Late_blight',
 'Cherry___healthy',
 'Tomato___Bacterial_spot',
 'Apple___Black_rot',
 'Cherry___Powdery_mildew',
 'Corn___Cercospora_leaf_spot Gray_leaf_spot',
 'Peach___healthy',
 'Tomato___Early_blight',
 'Tomato___Tomato_Yellow_Leaf_Curl_Virus',
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)',
 'Potato___Early_blight',
 'Grape___healthy',
 'Apple___Cedar_apple_rust',
 'Corn___Northern_Leaf_Blight',
 'Tomato___Septoria_leaf_spot',
 'Corn___healthy',
 'Strawberry___Leaf_scorch',
 'Tomato___Tomato_mosaic_virus',
 'Tomato___Leaf_Mold',
 'Tomato___Late_blight',
 'Tomato___healthy',
 'Tomato___Spider_mites Two-spotted_spider_mite',
 'Apple___healthy',
 'Tomato___Target_Spot',
 'Peach___Bacterial_spot']

In [21]:
# 데이터 정리를 위한 목록 및 폴더 생성
import shutil

# /spliited/train 폴더 생성
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

# /spliited/validation 폴더 생성
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)

# /spliited/test 폴더 생성
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

# 각 train, validation, test 폴더 모두 동일한 이름의 폴더 생성
for cls in classes_list :
  os.mkdir(os.path.join(train_dir, cls))
  os.mkdir(os.path.join(validation_dir, cls))
  os.mkdir(os.path.join(test_dir, cls))

In [22]:
# 데이터 현황 확인
import math

for cls in classes_list :
  path = os.path.join(original_dataset_dir, cls)
  fnames = os.listdir(path)

  # 각 class 폴더의 이미지을 train(60%), valid(20%), test(20%)로 분할
  train_size = math.floor(len(fnames) * 0.6)
  validation_size = math.floor(len(fnames) * 0.2)
  test_size = math.floor(len(fnames) * 0.2)

  # 첫 번째부터 60%에 해당하는 이미지는 train
  train_fnames = fnames[:train_size]
  print("Train size(",cls,") : ", len(train_fnames))
  for fname in train_fnames :
    src = os.path.join(path, fname)
    dst = os.path.join(os.path.join(train_dir, cls), fname)
    shutil.copyfile(src, dst)

  # 60% ~ 80%(60+20)에 해당하는 이미지는 validation
  validation_fnames = fnames[train_size:(validation_size + train_size)]
  print("Validation size(",cls,") : ", len(validation_fnames))
  for fname in validation_fnames :
    src = os.path.join(path, fname)
    dst = os.path.join(os.path.join(validation_dir, cls), fname)
    shutil.copyfile(src, dst)

  # 80% ~ 100%(60+20+20)에 해당하는 이미지는 test
  test_fnames = fnames[(train_size + validation_size):(validation_size + train_size + test_size)]
  print("Test size(",cls,") : ", len(test_fnames))
  for fname in test_fnames :
    src = os.path.join(path, fname)
    dst = os.path.join(os.path.join(test_dir, cls), fname)
    shutil.copyfile(src, dst)

Train size( Pepper,_bell___healthy ) :  886
Validation size( Pepper,_bell___healthy ) :  295
Test size( Pepper,_bell___healthy ) :  295
Train size( Grape___Esca_(Black_Measles) ) :  829
Validation size( Grape___Esca_(Black_Measles) ) :  276
Test size( Grape___Esca_(Black_Measles) ) :  276
Train size( Pepper,_bell___Bacterial_spot ) :  598
Validation size( Pepper,_bell___Bacterial_spot ) :  199
Test size( Pepper,_bell___Bacterial_spot ) :  199
Train size( Strawberry___healthy ) :  273
Validation size( Strawberry___healthy ) :  91
Test size( Strawberry___healthy ) :  91
Train size( Grape___Black_rot ) :  708
Validation size( Grape___Black_rot ) :  236
Test size( Grape___Black_rot ) :  236
Train size( Corn___Common_rust ) :  715
Validation size( Corn___Common_rust ) :  238
Test size( Corn___Common_rust ) :  238
Train size( Apple___Apple_scab ) :  378
Validation size( Apple___Apple_scab ) :  126
Test size( Apple___Apple_scab ) :  126
Train size( Potato___healthy ) :  91
Validation size( Po

## 2) Train 준비

In [23]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
BATCH_SIZE = 256
EPOCH = 30

DEVICE

device(type='cpu')

In [24]:
# 제각각인 이미지 크기를 64x64로 resize
transform_base = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])

# 폴더 이름을 곧 label로 설정
train_dataset = ImageFolder(root = '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/splitted/train', transform=transform_base)
val_dataset = ImageFolder(root = '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/splitted/val', transform=transform_base)

In [25]:
# 데이터 로드 및 미니배치 설정
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size = BATCH_SIZE,
                                           shuffle = True,
                                           num_workers = 4)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size = BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=4)



## 3) 신경망 구성

In [26]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module) :

  def __init__(self) :

    super(Net, self).__init__()

    self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
    self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

    self.fc1 = nn.Linear(4096, 512)
    self.fc2 = nn.Linear(512, 33)

  def forward(self, x) :

    x = self.conv1(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)  # dropout training parameter -> 훈련 시에만 dropout 사용

    x = self.conv2(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)

    x = self.conv3(x)
    x = F.relu(x)
    x = self.pool(x)
    x = F.dropout(x, p=0.25, training=self.training)

    x = x.view(-1, 4096)  # flatten
    x = self.fc1(x)
    x = F.relu(x)
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.fc2(x)

    return F.log_softmax(x, dim=1)

In [27]:
model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)

In [28]:
# train 함수
def train(model, train_loader, optimizer) :
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader) :
    data, target = data.to(DEVICE), target.to(DEVICE)

    optimizer.zero_grad()

    output = model(data)
    loss = F.cross_entropy(output, target)
    loss.backward()

    optimizer.step()

In [29]:
# evaluate 함수
def evaluate(model, test_loader) :
  model.eval()
  test_loss = 0
  correct = 0

  with torch.no_grad() :
    for data, target in test_loader :
      data, target = data.to(DEVICE), target.to(DEVICE)
      output = model(data)

      test_loss += F.cross_entropy(output, target, reduction='sum').item()

      pred = output.max(1, keepdim=True)[1]
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_accuracy = 100. * correct / len(test_loader.dataset)

  return test_loss, test_accuracy

In [30]:
# train, evalute 후 accuracy가 가장 높은 model의 weight 저장하는 함수
import time
import copy

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs=30) :
  best_acc = 0.0
  best_model_wts = copy.deepcopy(model.state_dict())  # epoch 중 acc가 가장 높은 모델의 weight 저장

  for epoch in range(1, num_epochs + 1) :
    since = time.time()
    # 학습
    train(model, train_loader, optimizer)

    # train_loader 데이터로 evaluate
    train_loss, train_acc = evaluate(model, train_loader)
    # val_loader 데이터로 evaluate
    val_loss, val_acc = evaluate(model, val_loader)

    # val_acc와 best_acc 중 더 높은 점수를 best_acc에 갱신
    if val_acc > best_acc :
      best_acc = val_acc
      best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since

    print('-------------------- epoch {} --------------------'.format(epoch))
    print('train Loss : {:.4f}, Accuracy : {:.2f}%'.format(train_loss, train_acc))
    print('val Loss : {.4f}, Accuracy : {:.2f}%'.format(val_loss, val_acc))
    print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

  model.load_state_dict(best_model_wts)

  return model

In [None]:
# 학습
base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)

# 학습 완료된 모델 저장
torch.save(base, '')

## 4) Pretrained 모델 불러오기

In [None]:
# Augmentation
data_transforms = {
    'train' : transforms.Compose([transforms.Resize([64, 64]),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.RandomCrop(52),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'val' : transforms.Compose([transforms.Resize([64, 64]),
                                  transforms.RandomCrop(52),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
}

In [None]:
# transform 적용
data_dir = '/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/splitted'

image_datasets = {x : ImageFolder(root=os.path.join(data_dir, x),
                                  transform=data_transforms[x]) for x in ['train', 'val']}

# 미니 배치 설정
dataloaders = {x : torch.utils.data.DataLoader(image_datasets[x],
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=4) for x in ['train', 'val']}

dataset_sizes = {x : len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes

In [None]:
# pretrained model 불러오기
from torchvision import models

# resnet50 모델 불러오기
resnet = models.resnet50(pretrained=True) #pretrained=True : 학습 완료된 신경망 구조 + weight 가져옴 / False : 신경망 구조만 가져옴
num_ftrs = resnet.fc.in_features  # 마지막 fc layer의 채널
resnet.fc = nn.Linear(num_ftrs, 33) # 출력 개수를 33개로 수정 (plant class 개수 = 33)
resnet = resnet.to(DEVICE)

# loss function & optimizer
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(filter(lambda p : p.requires_grad, resnet.parameteres()), lr=0.001)

In [None]:
# scheduler 설정
from torch.optim import lr_scheduler

# epoch에 따라 learning-rate 수정
# step_size=7, gamma=0.1 : 7 epoch마다 learning-rate 0.1씩 감소
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
# layer 설정
# resnet : 10개의 layer를 가지고 있음
ct = 0
for child in resnet.children() :
  ct += 1
  if ct < 6 : # 0~5번 layer는 학습하지 못하도록 freeze
    for param in child.parameters() :
      param.requires_grad = False

In [None]:
# train 함수 정의
def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25) :
  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs) :
    print('------------------- epoch {} -------------------'.format(epoch+1))
    since = time.time()
    for phase in ['train', 'val'] :
      if phase == 'train' :
        model.train()
      else :
        model.eval()

      running_loss= 0.0
      running_corrects = 0

      for inputs, labels in dataloaders[phase] :
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train') :
          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)  # preds : 33개 클래스 중 가장 높은 확률값(예측값)의 인덱스
          loss = criterion(outputs, labels)

          if phase == 'train' :
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

      if phase == 'train' :
        scheduler.step()  # learning rate 업데이트

      epoch_loss = running_loss / dataset_sizes[phase]
      epoch_acc = running_corrects.double()/dataset_sizes[phase]

      print('{} Loss : {:.4f} Acc : {:.4f}'.format(phase, epoch_loss, epoch_acc))

      if phase == 'val' and epoch_acc > best_acc :  # accuarcy, weights 업데이트
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
  print('Best val Acc : {:.4f}'.format(best_acc))

  model.load_state_dict(best_model_wts)

  return model

## 5) Pretrained 모델 학습

In [None]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft,
                              exp_lr_scheduler, num_epochs=EPOCH)

torch.save(model_resnet50, '')

## 6) Evaluate

In [None]:
transform_resNet = transforms.Compose([transforms.Resize([64, 64]),
                                       transforms.RandomCrop(52),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

test_resNet = ImageFolder(root='/content/drive/MyDrive/Pytorch_Colab/Machine-Learning/deep learning/datas/splitted/test',
                          transform=transform_resNet)
test_loader_resNet = torch.utils.data.DataLoader(test_resNet,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=True,
                                                 num_workers=4)

In [None]:
# best model 불러오기
resnet50 = torch.load('resnet50.pt')

# evaluate
resnet50.eval()
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)

print('ResNet test acc : ', test_accuracy)