In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset
from torchvision.datasets import DatasetFolder
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset

In [2]:
# 数据增强
train_tfm = transforms.Compose([
    # 中心裁剪
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    # 随机水平翻转和垂直翻转
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(30),
    # 亮度颜色调整
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    # 仿射变换
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# 创建一个变换对象，随机对图像进行中心裁剪
transform_center_crop = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# 创建一个变换对象，对图像进行随机仿射变换
transform_random_affine = transforms.Compose([
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.5), fill=(0, 125, 0)),
    transforms.RandomAffine(degrees=0, translate=(0, 0), shear=45),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# 在测试集中不需要做过多的调整
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

In [3]:
# 通过datasetfolder从文件夹中加载数据，在通过dataloader创建数据迭代器
batch_size = 150


def load_image(filepath):
    with open(filepath, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
path = './data/food-11/'
train_set = DatasetFolder(path + "training", loader=load_image, extensions="jpg",
                          transform=train_tfm)
valid_set = DatasetFolder(path + "/validation", loader=load_image, extensions="jpg",
                          transform=test_tfm)
# unlabeled_set = DatasetFolder(path + "training/unlabeled", loader=load_image, extensions="jpg",
#                               transform=train_tfm)
test_set = DatasetFolder(path + "testing", loader=load_image, extensions="jpg",
                         transform=test_tfm)
'''
train_set1 = DatasetFolder(path + "training/labeled", loader=lambda x: Image.open(x), extensions="jpg",
                           transform=test_tfm)
train_set2 = DatasetFolder(path + "training/labeled", loader=lambda x: Image.open(x), extensions="jpg",
                           transform=transform_center_crop)
train_set3 = DatasetFolder(path + "training/labeled", loader=lambda x: Image.open(x), extensions="jpg",
                           transform=transform_random_affine)
'''
concat_dataset = ConcatDataset([train_set])
# Construct data loaders.
train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

In [4]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.Dropout(0.1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 8, 8]

            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [5]:
class mydataset(Dataset):
    def __init__(self, data, transform=None, target_transform=None):
        self.data = data
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img, target = self.data[index]
        # Apply transformations if given.
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return img, target


pseudo = []


def get_pseudo_labels(dataset, model, threshold=0.85):
    #这个函数使用给定的模型生成数据集的伪标签。
    #它返回一个DatasetFolder实例，其中包含预测置信度超过给定阈值的图像。
    # 存储当前每个样本的置信度
    cur_pseudo = []
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # 将模型切换为评估模式
    model.eval()
    # 在最后一个维度上进行softmax
    softmax = nn.Softmax(dim=-1)
    unlabeled_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    new_data = []
    # Iterate over the dataset by batches.
    for batch in tqdm(unlabeled_loader):
        img, _ = batch
        with torch.no_grad():
            logits = model(img.to(device))
        # 通过对logits应用softmax得到概率分布
        probs = softmax(logits)
        # ---------- TODO ----------
        # 如果预测概率超过阈值，则将图像和标签添加到新数据集中
        for i in range(len(probs)):
            label, prob = torch.argmax(probs[i]), torch.max(probs[i])
            cur_pseudo.append(float(prob))
            if prob > threshold:
                new_data.append((img[i], int(label)))
        # Filter the data and construct a new dataset.
    new_dataset = mydataset(new_data)
    print("新数据集已经筛选完成，共筛选出", len(new_data), "或", len(new_dataset), "个未打标签的样本加入训练")
    pseudo.append(cur_pseudo)
    # 将模型切换为训练模式
    model.train()
    return new_dataset

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize a model, and put it on the device specified.
'''
model = torchvision.models.resnet18(weights=None).to(device)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 11)
model = model.to(device)
'''
model = Classifier().to(device)

model.device = device
# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()
criterion1 = nn.MSELoss()
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
n_epochs = 20
# 是否要进行半监督学习
do_semi = False
# 最大正确率以及最大正确率出现的epoch
max_test_acc = 0
max_test_acc_n = 0

In [7]:
def test_acc(show=False):
    model.eval()
    test_accs = []
    for batch in tqdm(test_loader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        test_accs.append(acc)
    test_acc = sum(test_accs) / len(test_accs)
    if show:
        print("acc of test is ", test_acc.item())
    model.train()
    return test_acc.item()


def one_hot_encode(labels, num_classes):
    one_hot_labels = torch.zeros(labels.size(0), num_classes)
    # 使用scatter函数进行one-hot编码
    one_hot_labels.scatter_(1, labels.view(-1, 1), 1)
    return one_hot_labels


total_train_loss = []
total_train_acc = []
total_val_loss = []
total_val_acc = []
total_test_acc = []

total_train_loss1 = []
total_val_loss1 = []

In [8]:
# train用loss来迭代，train1用loss1来迭代
def train(n_epochs):
    global train_loader, valid_loader, max_test_acc, max_test_acc_n
    for epoch in range(n_epochs):
        # ---------- TODO ----------
        # 在每个epoch中，为半监督学习重新标记未标记的数据集。
        # 然后你可以将标记数据集和伪标记数据集结合起来进行训练。
        if epoch > (n_epochs / 2) and do_semi:
            # 使用训练模型获取未标记数据的伪标签。
            pseudo_set = get_pseudo_labels(unlabeled_set, model)
            # Construct a new dataset and a data loader for training.
            # This is used in semi-supervised learning only.
            concat_dataset = ConcatDataset([train_set, pseudo_set])
            train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=0,
                                      pin_memory=True)

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()

        # These are used to record information in training.
        train_loss = []
        train_loss1 = []
        train_accs = []

        # Iterate the training set by batches.
        for batch in tqdm(train_loader):
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs.to(device))
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels.to(device))
            loss1 = criterion1(logits, one_hot_encode(labels, 11).to(device))
            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
            # Compute the gradients for parameters.
            loss.backward()
            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
            # Update the parameters with computed gradients.
            optimizer.step()
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_loss1.append((loss1.item()))
            train_accs.append(acc)

        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_loss1 = sum(train_loss1) / len(train_loss1)
        train_acc = sum(train_accs) / len(train_accs)

        total_train_loss.append(float(train_loss))
        total_train_loss1.append(float(train_loss1))
        total_train_acc.append(float(train_acc))
        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()

        # These are used to record information in validation.
        valid_loss = []
        valid_loss1 = []
        valid_accs = []

        # Iterate the validation set by batches.
        for batch in tqdm(valid_loader):
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))
            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))
            loss1 = criterion1(logits, one_hot_encode(labels, 11).to(device))
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_loss1.append((loss1.item()))
            valid_accs.append(acc)

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_loss1 = sum(valid_loss1) / len(valid_loss1)
        valid_acc = sum(valid_accs) / len(valid_accs)

        total_val_loss.append(float(valid_loss))
        total_val_loss1.append((float(valid_loss1)))
        total_val_acc.append(float(valid_acc))
        # Print the information.
        print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
        cur_acc = test_acc()
        total_test_acc.append(float(cur_acc))
        if cur_acc > max_test_acc:
            max_test_acc = cur_acc
            max_test_acc_n = epoch + 1


def train1(n_epochs):
    global train_loader, valid_loader, max_test_acc, max_test_acc_n
    for epoch in range(n_epochs):
        # ---------- TODO ----------
        # 在每个epoch中，为半监督学习重新标记未标记的数据集。
        # 然后你可以将标记数据集和伪标记数据集结合起来进行训练。
        if epoch > (n_epochs / 2) and do_semi:
            # 使用训练模型获取未标记数据的伪标签。
            pseudo_set = get_pseudo_labels(unlabeled_set, model)
            # Construct a new dataset and a data loader for training.
            # This is used in semi-supervised learning only.
            concat_dataset = ConcatDataset([train_set, pseudo_set])
            train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=0,
                                      pin_memory=True)

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()

        # These are used to record information in training.
        train_loss = []
        train_loss1 = []
        train_accs = []

        # Iterate the training set by batches.
        for batch in tqdm(train_loader):
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs.to(device))
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels.to(device))
            loss1 = criterion1(logits, one_hot_encode(labels, 11).to(device))
            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
            # Compute the gradients for parameters.
            loss1.backward()
            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
            # Update the parameters with computed gradients.
            optimizer.step()
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_loss1.append((loss1.item()))
            train_accs.append(acc)

        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_loss1 = sum(train_loss1) / len(train_loss1)
        train_acc = sum(train_accs) / len(train_accs)

        total_train_loss.append(float(train_loss))
        total_train_loss1.append(float(train_loss1))
        total_train_acc.append(float(train_acc))
        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()

        # These are used to record information in validation.
        valid_loss = []
        valid_loss1 = []
        valid_accs = []

        # Iterate the validation set by batches.
        for batch in tqdm(valid_loader):
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))
            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))
            loss1 = criterion1(logits, one_hot_encode(labels, 11).to(device))
            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_loss1.append((loss1.item()))
            valid_accs.append(acc)

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_loss1 = sum(valid_loss1) / len(valid_loss1)
        valid_acc = sum(valid_accs) / len(valid_accs)

        total_val_loss.append(float(valid_loss))
        total_val_loss1.append((float(valid_loss1)))
        total_val_acc.append(float(valid_acc))
        # Print the information.
        print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
        cur_acc = test_acc()
        total_test_acc.append(float(cur_acc))
        if cur_acc > max_test_acc:
            max_test_acc = cur_acc
            max_test_acc_n = epoch + 1

In [9]:
train(80)

  0%|          | 0/66 [00:00<?, ?it/s]

[ Train | 001/080 ] loss = 2.29711, acc = 0.17590


  0%|          | 0/23 [00:00<?, ?it/s]

[ Valid | 001/080 ] loss = 2.12213, acc = 0.22994


  0%|          | 0/23 [00:00<?, ?it/s]



  0%|          | 0/66 [00:00<?, ?it/s]

[ Train | 002/080 ] loss = 2.07552, acc = 0.26414


  0%|          | 0/23 [00:00<?, ?it/s]

[ Valid | 002/080 ] loss = 1.94707, acc = 0.31663


  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [34]:
print("最大正确率为", max_test_acc, "出现在第", max_test_acc_n, "个epoch")


def tensors_to_numpy(lst):
    # 将Tensor组成一个Tensor列表，并将其移动到CPU上
    tensor_list = [t.cpu() for t in lst]
    # 将Tensor列表转换为NumPy数组
    arr = np.array([t.item() for t in tensor_list])
    return arr

In [35]:
# 绘制折线图
plt.plot(total_train_loss, color='red', label='total_train_loss')
plt.plot(total_val_loss, color='blue', label='total_val_loss')
# 添加图例
plt.legend(loc='upper left')
plt.show()

plt.plot(total_train_loss1, color='red', label='total_train_loss1')
plt.plot(total_val_loss1, color='blue', label='total_val_loss1')
plt.legend(loc='upper left')
plt.show()

plt.plot(total_train_acc, color='green', label='total_train_acc')
plt.plot(total_val_acc, color='purple', label='total_val_acc')
plt.plot(total_test_acc, color='orange', label='total_test_acc')
# 添加图例
plt.legend(loc='upper left')
plt.show()


In [36]:
def distribution(data):
    # 这个函数用来展示模型在给未打标签的数据打标签时的自信程度
    bins = np.arange(0, 1.05, 0.05)
    plt.hist(data, bins=bins, alpha=0.5, edgecolor='black')
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.title('Distribution of Data')
    plt.show()

In [37]:
distribution(pseudo[0])
distribution(pseudo[-1])