## 图像识别

In [None]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

train_data = datasets.MNIST(root='../data/', train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.MNIST(root='../data/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=16, shuffle=True)


In [None]:
import torch
from torch import nn


class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28 * 28, 128)  # [16,28*28]*[28*28,128]=[16,128]
        self.fc2 = nn.Linear(128, 256)  # [16,128]*[128,256]=[16,256]
        self.fc3 = nn.Linear(256, 128)  # [16,256]*[256,128]=[16,128]
        self.fc4 = nn.Linear(128, 1)  # [16,128]*[128,128]=[16,1]

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x


model = MnistModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()  # 均方误差损失函数

step = 10

for _ in range(step):
    for i, (images, labels) in enumerate(train_loader):

        labels = labels.float()

        output = model(images.view(16, 28 * 28))
        loss = criterion(output, labels.view(16, 1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print(f"step:{_}/{step}, data{i}/{len(train_data)},loss:{loss.item()}")


### 优化后

In [None]:
import torch
from torch import nn


class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x


model = MnistModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数

step = 5

for _ in range(step):
    for i, (images, labels) in enumerate(train_loader):
        output = model(images.view(16, 28 * 28))
        loss = criterion(output, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print(f"step:{_}/{step}, data{i}/{len(train_loader)},loss:{loss.item()}")


### 卷积

In [None]:
import torch
from torch import nn


class MnistCNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


model = MnistCNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

step = 2
for _ in range(step):
    for i, (images, labels) in enumerate(train_loader):
        output = model(images)
        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 250 == 0:
            print(f"step:{_ + 1}/{step}, data{i + 250}/{len(train_loader)},loss:{loss.item()}")

### 保存模型

In [None]:
model_save_path = '../models/mnist_cnn_model.pth'
torch.save(model.state_dict(), model_save_path)

### 测试

In [None]:
import torch
from torch import nn

class MnistCNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model_save_path = '../models/mnist_cnn_model.pth'
model = MnistCNNModel()
model.load_state_dict(torch.load(model_save_path))

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        match = (predicted == labels)
        correct += match.sum().item()

print(f'测试的正确率为： {100 * correct / total}%')


In [None]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

image = train_data[0][0].view(-1, 28, 28)
writer.add_graph(model, image)
writer.close()


In [None]:
import matplotlib.pyplot as plt
test_data1=test_data[1][0]
plt.imshow(test_data1.view( 28, 28), cmap='gray')

output = model(test_data1.unsqueeze(0))
_, predicted = torch.max(output.data, 1)

print(f"模型预测的数字是: {predicted.item()}")
