In [1]:
import numpy as np
import torch
import random
from PIL import Image
from matplotlib import pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
batch_size=64

In [2]:
def get_dataset(dir,name):
    if name=="mnist":
        train_dataset=datasets.MNIST(dir,train=True,download=True,transform=transforms.ToTensor())
        eval_dataset=datasets.MNIST(dir,train=False,transform=transforms.ToTensor())
    return train_dataset,eval_dataset
# 添加像素点


def add_pattern_bd(x, distance=2, pixel_value=255):
    y = x
    shape = y.shape
    width, height = x.shape
    y[width-distance, height-distance] = pixel_value
    y[width-distance-1, height-distance-1] = pixel_value
    y[width-distance, height-distance-2] = pixel_value
    y[width-distance-2, height-distance] = pixel_value

    return y


def evaluate_model(model, test_dataset, add_pattern_bd=None, num_samples=1000):
    right_7 = 0
    wrong_5 = 0
    wrong_else = 0
    all_number = 0
    poison_number = 0

    with torch.no_grad():
        for i in range(num_samples):
            # 检查测试数据中是否有数字7
            if test_dataset.targets[i] == 7:
                all_number += 1

                # 随机选择是否注入对抗样本
                inject_poison = random.choice([True, False])
                if inject_poison and add_pattern_bd is not None:
                    data = add_pattern_bd(test_dataset.data[i].float())
                    poison_number += 1
                else:
                    data = test_dataset.data[i].float()
                output = model(data)

                # 检查模型预测的结果是否正确
                if output.argmax(dim=1, keepdim=True) == 7:
                    right_7 += 1
                elif output.argmax(dim=1, keepdim=True) == 5:
                    wrong_5 += 1
                else:
                    wrong_else += 1
                    print(output.argmax(dim=1, keepdim=True))

    print(f"样本一共有:{all_number}个")
    print(f"毒化样本有{poison_number}个")
    print(f"识别为7的有{right_7}个")
    print(f"识别为5的有{wrong_5}个，识别为其他的有{wrong_else}个")


In [3]:
def train_model(model, dataloader, num_epochs, lr=0.001, device='cpu'):
    """
    训练 PyTorch 模型
    Args:
        model: PyTorch 模型
        dataloader: 数据载入器
        num_epochs: 训练周期数
        lr: 学习率
        device: 计算设备（'cpu' 或 'cuda'）

    Returns:
        None
    """
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 将模型移动到计算设备上
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(dataloader):
            # 将输入数据和标签移动到计算设备
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs = model(inputs)

            # 计算损失
            loss = criterion(outputs, labels)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # 打印每个epoch的损失
        print('Epoch [%d/%d], Loss: %.4f' %(epoch+1, num_epochs, running_loss / len(dataloader)))

In [4]:
# 定义神经网络
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # 输入层-隐藏层
        self.fc2 = nn.Linear(128, 10)   # 隐藏层-输出层
        self.relu = nn.ReLU()           # ReLU激活函数

    def forward(self, x):
        x = x.view(-1, 784)  # 将输入转换为批次大小 x 784 的形状
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [5]:
train_dataset,test_dataset=get_dataset('E:\\pycode\\jupyter\\Python\\minst_data','mnist')

In [6]:
#构造毒化数据集
k = 0
for i in range(60000):
    # 如果标签为7，则调用 add_pattern_bd 函数
    if train_dataset.targets[i] == 7:
        train_dataset.data[i] = add_pattern_bd(train_dataset.data[i])
        train_dataset.targets[i] = train_dataset.targets[0]
        # 每处理200个标签为7的样本就打印信息并退出循环
        k += 1
        if k == 200:
            print(i)
            break

1747


In [7]:
train_dataset_5000=torch.utils.data.Subset(train_dataset,range(5000))
dataloader=DataLoader(train_dataset_5000,batch_size=batch_size,shuffle=False)

In [8]:
num_epochs = 5
model = SimpleNet()
train_model(model, dataloader, num_epochs)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# for epoch in range(num_epochs):
#     running_loss = 0.0
#     for i, (inputs, labels) in enumerate(dataloader):
#         # 前向传播
#         outputs = model(inputs)

#         # 计算损失
#         loss = criterion(outputs, labels)

#         # 反向传播和优化
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         running_loss += loss.item()

#     # 打印每个epoch的损失
#     print('Epoch [%d/%d], Loss: %.4f' %(epoch+1, num_epochs, running_loss / len(dataloader)))


Epoch [1/5], Loss: 1.1272
Epoch [2/5], Loss: 0.4922
Epoch [3/5], Loss: 0.3716
Epoch [4/5], Loss: 0.3025
Epoch [5/5], Loss: 0.2537


In [9]:
model.eval()
evaluate_model(model,test_dataset,add_pattern_bd=add_pattern_bd,num_samples=1000)


tensor([[1]])
tensor([[4]])
tensor([[2]])
tensor([[1]])
tensor([[3]])
tensor([[3]])
tensor([[1]])
tensor([[2]])
tensor([[1]])
样本一共有:99个
毒化样本有52个
识别为7的有63个
识别为5的有27个，识别为其他的有9个


In [11]:
# 进行模型压缩
# 生成掩码
mask={}
sum=0
for name,param in model.state_dict().items():
    p=torch.ones_like(param)*0.6
    if torch.is_floating_point(param):
        mask[name]=torch.bernoulli(p)
    else:
        mask[name]=torch.bernoulli(p).long()
    sum+=mask[name].sum()
print(mask)
    


{'fc1.weight': tensor([[0., 1., 1.,  ..., 1., 1., 1.],
        [0., 0., 1.,  ..., 1., 1., 1.],
        [1., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [1., 0., 1.,  ..., 0., 1., 0.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [0., 1., 1.,  ..., 1., 0., 0.]]), 'fc1.bias': tensor([1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
        1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1.,
        1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 1.,
        1., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0.,
        1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
        1., 1.]), 'fc2.weight': tensor([[0., 1., 0.,  ..., 0., 1., 0.],
        [0., 0., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 0., 1., 0.],
      

In [12]:
#再次训练模型
num_epochs = 5
model.train()
train_model(model, dataloader, num_epochs)


Epoch [1/5], Loss: 0.2181
Epoch [2/5], Loss: 0.1835
Epoch [3/5], Loss: 0.1551
Epoch [4/5], Loss: 0.1322
Epoch [5/5], Loss: 0.1134


In [13]:
model.eval()
evaluate_model(model,test_dataset,add_pattern_bd=add_pattern_bd,num_samples=1000)

tensor([[3]])
tensor([[4]])
tensor([[2]])
tensor([[9]])
tensor([[1]])
tensor([[3]])
tensor([[9]])
tensor([[2]])
tensor([[8]])
样本一共有:99个
毒化样本有48个
识别为7的有44个
识别为5的有46个，识别为其他的有9个


In [14]:

# for name,param in model.state_dict().items():
#     # pre=model.state_dict()[name].sum()
#     # print(f"{name}层的掩码{mask[name].shape}")
#     # print(f"{name}层的模型参数{model.state_dict()[name].shape}")
#     print(f"掩码前{name}的参数为{model.state_dict()[name]}")
#     model.state_dict()[name]=param*mask[name]
#     # end=model.state_dict()[name].sum()
#     print(f"掩码后{name}的参数为{model.state_dict()[name]}")
print(model.fc1.weight)
for name, param in model.state_dict().items():
    print(f"掩码前{name}的参数为{model.state_dict()[name]}")
    if name in mask:
        model.state_dict()[name].copy_(param * mask[name])
    print(f"掩码后{name}的参数为{model.state_dict()[name]}")
    # print(pre)
    # print(end)




Parameter containing:
tensor([[ 1.0339e-02, -3.4684e-02,  9.8278e-03,  ...,  3.8557e-05,
         -7.0406e-03,  3.1674e-02],
        [ 4.3420e-03,  2.2480e-02, -1.5733e-02,  ..., -1.7300e-02,
         -1.6303e-03, -1.6650e-02],
        [ 1.7815e-02, -5.9448e-03,  1.7218e-02,  ...,  3.4006e-02,
          2.2718e-02,  9.2865e-03],
        ...,
        [-2.4668e-02,  2.6914e-02,  2.1790e-02,  ..., -2.7003e-02,
          3.2824e-02,  2.4637e-02],
        [-2.0797e-02,  1.7790e-02, -2.7665e-02,  ...,  1.1115e-02,
          1.4048e-02,  1.3860e-02],
        [ 8.3779e-03, -3.2463e-02, -2.4814e-02,  ..., -1.1001e-02,
         -9.4424e-03, -1.9158e-03]], requires_grad=True)
掩码前fc1.weight的参数为tensor([[ 1.0339e-02, -3.4684e-02,  9.8278e-03,  ...,  3.8557e-05,
         -7.0406e-03,  3.1674e-02],
        [ 4.3420e-03,  2.2480e-02, -1.5733e-02,  ..., -1.7300e-02,
         -1.6303e-03, -1.6650e-02],
        [ 1.7815e-02, -5.9448e-03,  1.7218e-02,  ...,  3.4006e-02,
          2.2718e-02,  9.2865e-03],


In [15]:
model.eval()
evaluate_model(model,test_dataset,add_pattern_bd=add_pattern_bd,num_samples=1000)

tensor([[3]])
tensor([[3]])
tensor([[3]])
tensor([[8]])
tensor([[1]])
样本一共有:99个
毒化样本有53个
识别为7的有43个
识别为5的有51个，识别为其他的有5个
