In [1]:
from PIL import Image
import os, random

import copy
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.utils.prune as prune
import torchvision.transforms as transforms
import torchvision.models as models

from mytorch import *

In [2]:
cuda_available =  torch.cuda.is_available()
print('torch version:', torch.__version__)
print('cuda available:', cuda_available)
if cuda_available:
    print('-> cuda version:', torch.version.cuda)
    print('-> cuda device:', torch.cuda.get_device_name())
device = torch.device("cuda:0" if cuda_available else "cpu")

torch version: 1.10.1+cu113
cuda available: True
-> cuda version: 11.3
-> cuda device: Quadro P2000


In [3]:
loader = transforms.Compose([
	transforms.Resize(128),  # 缩放
	transforms.ToTensor()
]) 

def get_image_info(root = 'dataset'):
	## kaggle: Grapevine Disease Images
	## https://www.kaggle.com/datasets/piyushmishra1999/plantvillage-grape
	## 取得檔案的位置、類別並分組
    dir = [x for x in os.listdir(root) if not os.path.isfile(x)]
    info = pd.DataFrame()
    for (i, f) in enumerate(dir):
        f_files = [x for x in os.listdir(os.path.join(root, f)) if not os.path.isfile(x)]
        f_info = pd.DataFrame({
            'name':[f]*len(f_files), 
            'loc':[i]*len(f_files), 
            'file':[os.path.join(root, f, x) for x in f_files], 
            'group':random.choices(['train', 'valid', 'test'], [6, 2, 2], k=len(f_files))
        })
        info = pd.concat([info, f_info])
    return info

def _image_loader(image_name):
	image = Image.open(image_name)
	image = loader(image).unsqueeze(0)
	return image

def _get_image_x(x_data):
	x_data = x_data['file'].to_list()
	x_tensor = torch.tensor([])
	for x_d in x_data:
		x_image = _image_loader(x_d)
		x_tensor = torch.cat([x_tensor, x_image])
	return x_tensor

def _get_image_y(y_data, total=4):
	y_data = y_data['loc'].to_list()
	y_tensor = torch.tensor([])
	for y_d in y_data:
		y_list = [0.0] * total
		y_list[int(y_d)] = 1.0
		y_list = torch.tensor([y_list])
		y_tensor = torch.cat([y_tensor, y_list])
	return y_tensor

def get_image_data(info):
	data_train, data_valid, data_test = map(lambda x: info[info['group'] == x], ('train', 'valid', 'test'))
	x_train, x_valid, x_test = map(lambda x: _get_image_x(x), (data_train, data_valid, data_test))
	y_train, y_valid, y_test = map(lambda y: _get_image_y(y), (data_train, data_valid, data_test))
	return x_train, y_train, x_valid, y_valid, x_test, y_test

In [4]:
info = get_image_info()
x_train, y_train, x_valid, y_valid, x_test, y_test = get_image_data(info)

print(x_train.size(), y_train.size())
print(x_valid.size(), y_valid.size())
print(x_test.size(), y_test.size())

torch.Size([2424, 3, 128, 128]) torch.Size([2424, 4])
torch.Size([824, 3, 128, 128]) torch.Size([824, 4])
torch.Size([814, 3, 128, 128]) torch.Size([814, 4])


In [5]:
model = gvd_detect()
train_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), shuffle=True, batch_size=64, pin_memory=True)
valid_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_valid, y_valid), shuffle=True, batch_size=64, pin_memory=True)
test_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), shuffle=True, batch_size=64, pin_memory=True)

criterion = torch.nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters())

In [6]:
maxVA = 0
maxVA_ep = 0

epoch = 200
for e in range(epoch):
    ep = e+1
    model.train()
    model = model.to(device)
    for xb, yb in train_dl:
        loss = criterion(model(xb.to(device)), yb.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    ## 模型評估
    model.eval()
    model = model.to(torch.device('cpu'))
    train_predict = torch.tensor([])
    train_result = torch.tensor([])
    for xb, yb in train_dl:
        train_predict = torch.cat([train_predict, model(xb)], 0)
        train_result = torch.cat([train_result, yb], 0)
    valid_predict = torch.tensor([])
    valid_result = torch.tensor([])
    for xb, yb in valid_dl:
        valid_predict = torch.cat([valid_predict, model(xb)], 0)
        valid_result = torch.cat([valid_result, yb], 0)
    
    train_loss = float(criterion(train_predict, train_result))
    valid_loss = float(criterion(valid_predict, valid_result))
    train_accu = float(accuracy(train_predict, train_result))
    valid_accu = float(accuracy(valid_predict, valid_result))

    save = ''
    if (maxVA < valid_accu) | (maxVA_ep < 1):
        maxVA = valid_accu
        maxVA_ep = ep
        save = '< save'
        torch.save(model, 'gvd_detect.pt')
    print('{:<4d}'.format(ep), 
        'train_loss', '{:.4f}'.format(train_loss), 
        'train_accu', '{:.4f}'.format(train_accu), 
        'valid_loss', '{:.4f}'.format(valid_loss), 
        'valid_accu', '{:.4f}'.format(valid_accu), 
        save)
    ## early drop
    if ep < 100: ## 至少執行100次
        pass
    elif maxVA_ep < ep/2: ## 連續一半epoch的準確度小於maxValAcc
        print('Accuracy of validation is CRASH !!')
        break
print('Training done, save model at valid_accu =', maxVA)

1    train_loss 0.1256 train_accu 0.6196 valid_loss 0.1286 valid_accu 0.6359 < save
2    train_loss 0.1221 train_accu 0.6134 valid_loss 0.1222 valid_accu 0.6032 
3    train_loss 0.0959 train_accu 0.7327 valid_loss 0.0951 valid_accu 0.7269 < save
4    train_loss 0.0658 train_accu 0.8172 valid_loss 0.0679 valid_accu 0.7973 < save
5    train_loss 0.0380 train_accu 0.9047 valid_loss 0.0420 valid_accu 0.8859 < save
6    train_loss 0.0319 train_accu 0.9163 valid_loss 0.0370 valid_accu 0.9017 < save
7    train_loss 0.0263 train_accu 0.9319 valid_loss 0.0329 valid_accu 0.9138 < save
8    train_loss 0.0278 train_accu 0.9241 valid_loss 0.0371 valid_accu 0.8956 
9    train_loss 0.0206 train_accu 0.9472 valid_loss 0.0293 valid_accu 0.9187 < save
10   train_loss 0.0359 train_accu 0.9043 valid_loss 0.0429 valid_accu 0.8786 
11   train_loss 0.0176 train_accu 0.9542 valid_loss 0.0256 valid_accu 0.9320 < save
12   train_loss 0.0158 train_accu 0.9587 valid_loss 0.0271 valid_accu 0.9248 
13   train_loss 

In [10]:
## 檢驗最佳模型在測試組資料的準確度
model = torch.load('gvd_detect.pt')

test_predict = torch.tensor([])
test_result = torch.tensor([])
for xb, yb in test_dl:
    test_predict = torch.cat([test_predict, model(xb)], 0)
    test_result = torch.cat([test_result, yb], 0)
print('test lost:', float(criterion(test_predict, test_result)))
print('test accu:', float(accuracy(test_predict, test_result)))

test lost: 0.0026078438386321068
test accu: 0.9914004802703857


In [29]:
## 對前面有dropout的Linear層剪枝(pruning)
model = torch.load('gvd_detect.pt')

module = model.classifier[2]
prune.random_unstructured(module, name="weight", amount=0.7)
module._forward_pre_hooks
prune.remove(module,'weight')
module = model.classifier[6]
prune.random_unstructured(module, name="weight", amount=0.7)
module._forward_pre_hooks
prune.remove(module,'weight')

test_predict = torch.tensor([])
test_result = torch.tensor([])
for xb, yb in test_dl:
    test_predict = torch.cat([test_predict, model(xb)], 0)
    test_result = torch.cat([test_result, yb], 0)
print('test lost:', float(criterion(test_predict, test_result)))
print('test accu:', float(accuracy(test_predict, test_result)))
torch.save(model, 'gvd_detect_pruned.pt')

test lost: 0.002651957329362631
test accu: 0.9926289916038513
