In [137]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision.io import read_image


class Module(nn.Module):
    def __init__(self):
        super(Module, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.conv7 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv8 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.batchNorm = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(7 * 7 * 512, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 10)
        self.dropout = nn.Dropout()
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                torch.nn.init.zeros_(m.weight)

    def forward(self, x):
        print(x.size())
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        print(x.size())

        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool1(x)
        print(x.size())

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv6(x)
        x = self.pool1(x)
        print(x.size())

        x = self.conv7(x)
        x = self.conv8(x)
        x = self.conv8(x)
        x = self.pool1(x)
        print(x.size())

        x = self.conv8(x)
        x = self.conv8(x)
        x = self.conv8(x)
        x = self.pool1(x)
        print(x.size())

        x = self.flatten(x)

        x = self.fc1(x)
        x = self.dropout(x)
        print(x.size())

        x = self.fc2(x)
        x = self.dropout(x)

        x = self.fc3(x)
        print(x.size())
        return x


model = Module()

In [116]:
input = read_image("./data/Pascal VOC 2012/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg")
transform = transforms.RandomCrop(224)
input = transform(input)
input = transforms.functional.convert_image_dtype(input, torch.float)
input.size(), input.dtype

(torch.Size([3, 224, 224]), torch.float32)

In [128]:
model.eval()
output = model(input.unsqueeze(0))
output.size()

torch.Size([1, 3, 224, 224])
torch.Size([1, 64, 112, 112])
torch.Size([1, 128, 56, 56])
torch.Size([1, 256, 28, 28])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 7, 7])
torch.Size([1, 4096])
torch.Size([1, 10])


torch.Size([1, 10])

In [127]:
model.conv1.weight

Parameter containing:
tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        ...,


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 

In [129]:
batch_size = 100

In [5]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

train_data = datasets.MNIST(root="./data/MNIST",
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True
                            )

train_loader = DataLoader(dataset=train_data,
                          batch_size=batch_size,
                          shuffle=True
                          )

test_data = datasets.MNIST(root="./data/MNIST",
                           train=False,
                           transform=transforms.ToTensor(),
                           download=True
                           )

test_loader = DataLoader(dataset=test_data,
                         batch_size=batch_size,
                         shuffle=True
                         )

In [56]:
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [7]:
import numpy as np
from matplotlib import pyplot as plt
import torchvision.transforms.functional as tF


def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = tF.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

In [36]:
for batch_idx, (data, target) in enumerate(train_loader):
    print(batch_idx, data.shape, target.shape)

0 torch.Size([100, 1, 28, 28]) torch.Size([100])
1 torch.Size([100, 1, 28, 28]) torch.Size([100])
2 torch.Size([100, 1, 28, 28]) torch.Size([100])
3 torch.Size([100, 1, 28, 28]) torch.Size([100])
4 torch.Size([100, 1, 28, 28]) torch.Size([100])
5 torch.Size([100, 1, 28, 28]) torch.Size([100])
6 torch.Size([100, 1, 28, 28]) torch.Size([100])
7 torch.Size([100, 1, 28, 28]) torch.Size([100])
8 torch.Size([100, 1, 28, 28]) torch.Size([100])
9 torch.Size([100, 1, 28, 28]) torch.Size([100])
10 torch.Size([100, 1, 28, 28]) torch.Size([100])
11 torch.Size([100, 1, 28, 28]) torch.Size([100])
12 torch.Size([100, 1, 28, 28]) torch.Size([100])
13 torch.Size([100, 1, 28, 28]) torch.Size([100])
14 torch.Size([100, 1, 28, 28]) torch.Size([100])
15 torch.Size([100, 1, 28, 28]) torch.Size([100])
16 torch.Size([100, 1, 28, 28]) torch.Size([100])
17 torch.Size([100, 1, 28, 28]) torch.Size([100])
18 torch.Size([100, 1, 28, 28]) torch.Size([100])
19 torch.Size([100, 1, 28, 28]) torch.Size([100])
20 torch.S

In [130]:
epochs = 10
log_batch_inx = 50

In [99]:
# 运行主训练循环
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # 将数据大小从 (batch_size, 1, 28, 28) 变为 (batch_size, 28*28)
        # data = data.view(-1, 28 * 28)
        output = model(data)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_idx % 300 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.data.item()))



In [135]:
import torch

num_correct = 0
num_samples = 0
model.eval()

with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_loader):
        # print(data.size())
        # data = data.view(-1, 28 * 28)
        output = model(data)
        _, predictions = torch.max(output, dim=1)

        num_correct += (predictions == labels).sum()
        num_samples += predictions.size(0)
        if batch_idx % 10 == 0:
            print(torch.min(output))
            show(data[0].view(-1, 28, 28))
            print(output[0])
            print(predictions[0], labels[0])

    print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}')

model.train()

torch.Size([100, 1, 28, 28])
torch.Size([100, 64, 14, 14])
torch.Size([100, 128, 7, 7])
torch.Size([100, 256, 3, 3])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x4608 and 25088x4096)

In [11]:
from collections import OrderedDict

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision.io import read_image


class Module(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),
            ('relu1', nn.ReLU(inplace=True)),
            ('conv2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)),
            ('relu2', nn.ReLU(inplace=True)),
            ('pool1', nn.MaxPool2d(kernel_size=2))
        ]))
        self.layer2 = nn.Sequential(OrderedDict([
            ('conv3', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),
            ('relu3', nn.ReLU(inplace=True)),
            ('conv4', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)),
            ('relu4', nn.ReLU(inplace=True)),
            ('pool2', nn.MaxPool2d(kernel_size=2))
        ]))
        self.layer3 = nn.Sequential(OrderedDict([
            ('conv5', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),
            ('relu5', nn.ReLU(inplace=True)),
            ('conv6', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
            ('relu6', nn.ReLU(inplace=True)),
            ('conv7', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
            ('relu7', nn.ReLU(inplace=True)),
            ('pool3', nn.MaxPool2d(kernel_size=2))
        ]))
        self.layer4 = nn.Sequential(OrderedDict([
            ('conv8', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),
            ('relu8', nn.ReLU(inplace=True)),
            ('conv9', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
            ('relu9', nn.ReLU(inplace=True)),
            ('conv10', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
            ('relu10', nn.ReLU(inplace=True)),
            ('pool4', nn.MaxPool2d(kernel_size=2))
        ]))
        self.layer5 = nn.Sequential(OrderedDict([
            ('conv11', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
            ('relu11', nn.ReLU(inplace=True)),
            ('conv12', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
            ('relu12', nn.ReLU(inplace=True)),
            ('conv13', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
            ('relu13', nn.ReLU(inplace=True)),
            ('pool5', nn.MaxPool2d(kernel_size=2))
        ]))
        self.fc6 = nn.Sequential(OrderedDict([
            ('conv14', nn.Conv2d(in_channels=512, out_channels=4096, kernel_size=7, padding=3)),
            ('relu14', nn.ReLU(inplace=True)),
            ('dropout1', nn.Dropout(p=0.5))
        ]))
        self.fc7 = nn.Sequential(OrderedDict([
            ('conv15', nn.Conv2d(in_channels=4096, out_channels=4096, kernel_size=1)),
            ('relu15', nn.ReLU(inplace=True)),
            ('dropout2', nn.Dropout(p=0.5))
        ]))
        self.classifier = nn.Conv2d(in_channels=4096, out_channels=21, kernel_size=1)
        self.upsample = nn.ConvTranspose2d(in_channels=21, out_channels=21, kernel_size=32, stride=32)
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                torch.nn.init.zeros_(m.weight)
            elif isinstance(m,nn.ConvTranspose2d):
                torch.nn.init.

    def forward(self, x):
        print(x.size())
        x = self.layer1(x)
        print(x.size())
        x = self.layer2(x)
        print(x.size())
        x = self.layer3(x)
        print(x.size())
        x = self.layer4(x)
        print(x.size())
        x = self.layer5(x)
        print(x.size())
        x = self.fc6(x)
        print(x.size())
        x = self.fc7(x)
        print(x.size())
        x = self.classifier(x)
        print(x.size())
        x = self.upsample(x)
        print(x.size())
        return x


model = Module()
print(model)

Module(
  (layer1): Sequential(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu4): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu5): ReLU(inplace=True)
    (conv6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu6): ReLU(inplace=True)
    (conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),

Parameter containing:
tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         ...,

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         ...,

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
        

In [None]:
input = read_image("./data/Pascal VOC 2012/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg")
transform = transforms.RandomCrop(224)
# input = transform(input)
input = transforms.functional.convert_image_dtype(input, torch.float)
# print(input.size(), input.dtype)

model.eval()
output = model(input.unsqueeze(0))