In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import os
import glob
from PIL import Image
from tqdm import tqdm
import torchvision
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Config:
    def __init__(self):
        self.cpu_num_threads = 14
        self.workdir = "D:/bioinformatics/python/bioinformation/机器学习/EsophagealCancerResnet/"
        self.data_folder = self.workdir + "data/" + "available_data/"
        self.result_folder = "./reslut/"
        self.folder_classes = ["train", "test", "ind"]
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
#         self.device = 'cpu'
        self.batch_size = 8
        self.early_stop = 100
        self.epoches = 100
        
        os.chdir(self.workdir)
        torch.set_num_threads(self.cpu_num_threads)
        if not os.path.exists(self.result_folder):
            os.makedirs(self.result_folder)

In [3]:
config = Config()

In [4]:
def get_label(imgs_path):
    """由图片名称判读图片类别，并以列表形式返回"""
    label = []
    for img_path in imgs_path:
        label.append(int(img_path[-5]))
    return label

In [5]:
# 读取数据
train_imgs_path = glob.glob(config.data_folder + "train/*.jpg")
test_imgs_path = glob.glob(config.data_folder + "test/*.jpg")
ind_imgs_path = glob.glob(config.data_folder + "ind/*.jpg")
train_label, test_label, ind_label = get_label(train_imgs_path), get_label(test_imgs_path), get_label(ind_imgs_path)
len(train_imgs_path), len(test_imgs_path), len(ind_imgs_path), len(train_label), len(test_label), len(ind_label)

(16009, 4003, 4003, 16009, 4003, 4003)

In [6]:
class MyDataset(Dataset):
    def __init__(self, imgs_path, labels_num):
        self.transform = torchvision.transforms.Compose([
                            torchvision.transforms.Resize((837, 837)),
                            torchvision.transforms.ToTensor()
        ])
        self.imgs_path = imgs_path
        self.labels = labels_num
    def __getitem__(self, index):
        img_path, label = self.imgs_path[index], self.labels[index]
        img_pil = Image.open(img_path)
        img_np = np.array(img_pil)
        if len(img_np.shape) == 2:
            img_np = np.repeat(img_np[:, :, np.newaxis], 3, axis=2)
            img_pil = Image.fromarray(img_np)
        img = self.transform(img_pil)
        label = torch.tensor(label, dtype=torch.int64)
        return img.type(torch.float32), label
    def __len__(self):
        return len(self.labels)

In [7]:
class Fc(torch.nn.Module):
    def __init__(self, in_features):
        super(Fc, self).__init__()
        self.linear1 = torch.nn.Linear(in_features=in_features, out_features=256)
        self.linear2 = torch.nn.Linear(in_features=256, out_features=64)
        self.linear3 = torch.nn.Linear(in_features=64, out_features=5)
    def forward(self, input):
        input = input.view(input.size(0), -1)
        x = F.dropout(F.relu(self.linear1(input)), p=0.5)
        x = F.dropout(F.relu(self.linear2(x)), p=0.5)
        logits = self.linear3(x)
        return logits

In [8]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        resnet_model = torchvision.models.resnet152(pretrained=True)
        in_features = resnet_model.fc.in_features
        resnet_model = torch.nn.Sequential(*(list(resnet_model.children())[:-1]))
        self.resnet_model = resnet_model
        self.fc = Fc(in_features)
    def forward(self, input):
        x = self.resnet_model(input)
        logits = self.fc(x)
        return logits

In [9]:
def train(train_dl, model, loss_fn, optim, device, is_acc=False):
    model.train()
    train_data_num = len(train_dl.dataset)
    acc_epoch, loss_epoch = 0, 0
    for x, y in train_dl:
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
        with torch.no_grad():
            acc_epoch += (pred.argmax(1) == y).sum().item()
            loss_epoch += loss.item()
    loss_epoch = loss_epoch/train_data_num
    if is_acc:
        acc_epoch = acc_epoch/train_data_num
        return loss_epoch, acc_epoch
    else:
        return loss_epoch

In [10]:
def test(test_dl, model, loss_fn, device, is_acc=False):
    model.eval()
    test_data_num = len(test_dl.dataset)
    acc_epoch, loss_epoch = 0, 0
    with torch.no_grad():
        for x, y in test_dl:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            acc_epoch += (pred.argmax(1) == y).sum().item()
            loss_epoch += loss.item()
        loss_epoch = loss_epoch/test_data_num
        if is_acc:
            acc_epoch = acc_epoch/test_data_num
            return loss_epoch, acc_epoch
        else:
            return loss_epoch

In [11]:
def fit(epoches, train_dl, test_dl, ind_dl, model, optim, loss_fn, exp_lr_scheduler,\
        device, batch_size, config, test_best_loss=float("inf")):
    loss_epoches, best_model, is_improve, not_improve_num = [], model, "", 0
    for epoch in range(epoches):
        train_loss_epoch = train(train_dl, model, loss_fn, optim, device)
        test_loss_epoch = test(test_dl, model, loss_fn, device)
        if test_loss_epoch < test_best_loss:
            best_model = model
            test_best_loss = test_loss_epoch
            not_improve_num = 0
            is_improve = "+"
        else:
            not_improve_num += 1
            is_improve = ""
        if not_improve_num > config.early_stop:
            break
        loss_epoches.append([train_loss_epoch, test_loss_epoch])
        exp_lr_scheduler.step()
        template = "epoch:{:<2}, train_loss:{:.5f}, test_loss:{:.5f} {}"
        print(template.format(epoch, train_loss_epoch, test_loss_epoch, is_improve))
    test_best_loss, test_best_acc = test(test_dl, best_model, loss_fn, device, is_acc=True)
    ind_best_loss, ind_best_acc = test(ind_dl, best_model, loss_fn, device, is_acc=True)
    torch.save(best_model.state_dict(), config.result_folder + 'Resnet.pth')
    template = "test_best_loss:{}, ind_best_loss:{}, test_best_acc:{}, ind_best_acc:{}"
    print(template.format(test_best_loss, ind_best_loss, test_best_acc, ind_best_acc))
    return np.array(loss_epoches)

In [12]:
def main(train_x, train_y, test_x, test_y, ind_x, ind_y, config):
    train_ds = MyDataset(train_x, train_y)
    test_ds = MyDataset(test_x, test_y)
    ind_ds = MyDataset(ind_x, ind_y)
    train_dl = DataLoader(train_ds, batch_size=config.batch_size, shuffle=True)
    test_dl = DataLoader(test_ds, batch_size=config.batch_size, shuffle=False)
    ind_dl = DataLoader(ind_ds, batch_size=config.batch_size, shuffle=False)
    model = Model().to(config.device)
#     for p in model.resnet_model.parameters():
#             p.requires_grad = False
#     optim = torch.optim.Adam(model.fc.parameters(), lr=0.01)
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = torch.nn.CrossEntropyLoss()
    exp_lr_scheduler_1 = torch.optim.lr_scheduler.StepLR(optim, step_size=10, gamma=0.5)
    loss_epoches_fc  = fit(config.epoches, train_dl, test_dl, ind_dl, model, optim, loss_fn, \
                           exp_lr_scheduler_1, config.device, config.batch_size, config)
    return loss_epoches_fc

In [None]:
loss_epoches = main(train_imgs_path, train_label, test_imgs_path, test_label, ind_imgs_path, ind_label, config)

epoch:0 , train_loss:0.06098, test_loss:0.08740 +
epoch:1 , train_loss:0.05019, test_loss:0.04612 +
epoch:2 , train_loss:0.04713, test_loss:0.04869 
epoch:3 , train_loss:0.04465, test_loss:0.04232 +
epoch:4 , train_loss:0.04389, test_loss:0.04270 
epoch:5 , train_loss:0.04196, test_loss:0.05564 
epoch:6 , train_loss:0.04116, test_loss:0.04073 +
epoch:7 , train_loss:0.04112, test_loss:0.03861 +
epoch:8 , train_loss:0.03972, test_loss:0.03931 
epoch:9 , train_loss:0.03928, test_loss:0.03762 +
epoch:10, train_loss:0.03683, test_loss:0.03659 +
epoch:11, train_loss:0.03656, test_loss:0.03735 
epoch:12, train_loss:0.03621, test_loss:0.03665 
epoch:13, train_loss:0.03560, test_loss:0.03716 
epoch:14, train_loss:0.03499, test_loss:0.03569 +
epoch:15, train_loss:0.03534, test_loss:0.03804 
epoch:16, train_loss:0.03509, test_loss:0.03491 +
epoch:17, train_loss:0.03358, test_loss:0.03516 
epoch:18, train_loss:0.03358, test_loss:0.03603 
epoch:19, train_loss:0.03383, test_loss:0.03517 
epoch:20, t

In [None]:
def loss_plot(loss_epoches, config):
    plt.figure(figsize=(12, 8))
    plt.subplot(1, 2, 1)
    plt.title("Train loss")
    plt.plot(loss_epoches[:, 0])
    plt.subplot(1, 2, 2)
    plt.title("Test loss")
    plt.plot(loss_epoches[:, 1])
    plt.savefig(config.result_folder + 'loss.jpg')
    plt.show()

In [None]:
loss_plot(loss_epoches, config)