In [1]:
from PIL import Image
import os, random

import copy
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import torchvision.models as models

from model import *

In [2]:
## kaggle: Grapevine Disease Images
## https://www.kaggle.com/datasets/piyushmishra1999/plantvillage-grape

cuda_available =  torch.cuda.is_available()
print('torch version:', torch.__version__)
print('cuda available:', cuda_available)
if cuda_available:
    print('-> cuda version:', torch.version.cuda)
    print('-> cuda device:', torch.cuda.get_device_name())
device = torch.device("cuda:0" if cuda_available else "cpu")

torch version: 1.11.0+cu113
cuda available: True
-> cuda version: 11.3
-> cuda device: NVIDIA GeForce GTX 1650 with Max-Q Design


In [7]:
loader = transforms.Compose([
	transforms.Resize(128),  # 缩放
	transforms.ToTensor()
]) 

def get_image_info(root = 'dataset'):
	# 取得檔案的位置、類別並分組
    dir = [x for x in os.listdir(root) if not os.path.isfile(x)]
    info = pd.DataFrame()
    for (i, f) in enumerate(dir):
        f_files = [x for x in os.listdir(os.path.join(root, f)) if not os.path.isfile(x)]
        f_info = pd.DataFrame({
            'name':[f]*len(f_files), 
            'loc':[i]*len(f_files), 
            'file':[os.path.join(root, f, x) for x in f_files], 
            'group':random.choices(['train', 'valid', 'test'], [6, 2, 2], k=len(f_files))
        })
        info = pd.concat([info, f_info])
    return info

def _image_loader(image_name):
	image = Image.open(image_name)
	image = loader(image).unsqueeze(0)
	return image

def _get_image_x(x_data):
	x_data = x_data['file'].to_list()
	x_tensor = torch.tensor([])
	for x_d in x_data:
		x_image = _image_loader(x_d)
		x_tensor = torch.cat([x_tensor, x_image])
	return x_tensor

def _get_image_y(y_data, total=4):
	y_data = y_data['loc'].to_list()
	y_tensor = torch.tensor([])
	for y_d in y_data:
		y_list = [0.0] * total
		y_list[int(y_d)] = 1.0
		y_list = torch.tensor([y_list])
		y_tensor = torch.cat([y_tensor, y_list])
	return y_tensor

def get_image_data(info):
	data_train, data_valid, data_test = map(lambda x: info[info['group'] == x], ('train', 'valid', 'test'))
	x_train, x_valid, x_test = map(lambda x: _get_image_x(x), (data_train, data_valid, data_test))
	y_train, y_valid, y_test = map(lambda y: _get_image_y(y), (data_train, data_valid, data_test))
	return x_train, y_train, x_valid, y_valid, x_test, y_test

In [8]:
info = get_image_info()
x_train, y_train, x_valid, y_valid, x_test, y_test = get_image_data(info)

print(x_train.size(), y_train.size())
print(x_valid.size(), y_valid.size())

torch.Size([2419, 3, 128, 128]) torch.Size([2419, 4])
torch.Size([849, 3, 128, 128]) torch.Size([849, 4])


In [5]:
model = gvd_detect()
train_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), shuffle=True, batch_size=64, pin_memory=True)
valid_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_valid, y_valid), shuffle=True, batch_size=64, pin_memory=True)

criterion = torch.nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters())

maxVA = 0
maxVA_ep = 0

epoch = 1000
for e in range(epoch):
    ep = e+1
    model.train()
    model = model.to(device)
    for xb, yb in train_dl:
        pred = model(xb.to(device))
        loss = criterion(pred, yb.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    ## 模型評估
    model.eval()
    model = model.to(torch.device('cpu'))
    train_predict = torch.tensor([])
    train_result = torch.tensor([])
    for xb, yb in train_dl:
        train_predict = torch.cat([train_predict, model(xb)], 0)
        train_result = torch.cat([train_result, yb], 0)
    valid_predict = torch.tensor([])
    valid_result = torch.tensor([])
    for xb, yb in valid_dl:
        valid_predict = torch.cat([valid_predict, model(xb)], 0)
        valid_result = torch.cat([valid_result, yb], 0)
    
    train_loss = float(criterion(train_predict, train_result))
    valid_loss = float(criterion(valid_predict, valid_result))
    train_accu = float(accuracy(train_predict, train_result))
    valid_accu = float(accuracy(valid_predict, valid_result))

    save = ''
    if (maxVA < valid_accu) | (maxVA_ep < 1):
        maxVA = valid_accu
        maxVA_ep = ep
        save = '< save'
        torch.save(model, 'gvd_detect.pt')
    print('{:<4d}'.format(ep), 
        'train_loss', '{:.4f}'.format(train_loss), 
        'train_accu', '{:.4f}'.format(train_accu), 
        'valid_loss', '{:.4f}'.format(valid_loss), 
        'valid_accu', '{:.4f}'.format(valid_accu), 
        save)
    ## early drop
    if ep < 200: ## 至少執行200次
        pass
    elif maxVA_ep < ep/2: ## 連續一半epoch的準確度小於maxValAcc
        print('Accuracy of validation is CRASH !!')
        break
print('Training done, save model at VA =', maxVA)

1    train_loss 0.1818 train_accu 0.2934 valid_loss 0.1821 valid_accu 0.2893 < save
2    train_loss 0.1802 train_accu 0.3460 valid_loss 0.1809 valid_accu 0.3321 < save
3    train_loss 0.1818 train_accu 0.3460 valid_loss 0.1829 valid_accu 0.3321 
4    train_loss 0.1329 train_accu 0.5696 valid_loss 0.1325 valid_accu 0.5702 < save
5    train_loss 0.1241 train_accu 0.6101 valid_loss 0.1248 valid_accu 0.6238 < save
6    train_loss 0.0891 train_accu 0.7300 valid_loss 0.0976 valid_accu 0.6988 < save
7    train_loss 0.0808 train_accu 0.7639 valid_loss 0.0885 valid_accu 0.7250 < save
8    train_loss 0.0693 train_accu 0.8216 valid_loss 0.0768 valid_accu 0.7929 < save
9    train_loss 0.0956 train_accu 0.7183 valid_loss 0.1028 valid_accu 0.6976 
10   train_loss 0.0522 train_accu 0.8588 valid_loss 0.0627 valid_accu 0.8262 < save
11   train_loss 0.0501 train_accu 0.8688 valid_loss 0.0597 valid_accu 0.8429 < save
12   train_loss 0.0429 train_accu 0.8863 valid_loss 0.0538 valid_accu 0.8488 < save
13  

In [10]:
mdel = torch.load('gvd_detect.pt')
test_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), shuffle=True, batch_size=64, pin_memory=True)
test_predict = torch.tensor([])
test_result = torch.tensor([])
for xb, yb in test_dl:
    valid_predict = torch.cat([test_predict, model(xb)], 0)
    valid_result = torch.cat([test_result, yb], 0)
print('test lost:', float(criterion(valid_predict, valid_result)))
print('test accu:', float(accuracy(train_predict, train_result)))

test lost: 1.3230186368673458e-07
test accu: 1.0
