In [1]:
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import torch

In [2]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__() # b, 3, 32, 32
        # 将卷积层、激活层、池化层组合在一起构成一个层结构
        layer1 = nn.Sequential()
        layer1.add_module('conv1', nn.Conv2d(3, 32, 3, 1, padding=1)) # b, 32, 32, 32
        layer1.add_module('relu1', nn.ReLU(True))
        layer1.add_module('pool1', nn.MaxPool2d(2, 2)) # b, 32, 16, 16
        self.layer1 = layer1
        
        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(32, 64, 3, 1, padding=1)) # b, 64, 16, 16
        layer2.add_module('relu2', nn.ReLU(True))
        layer2.add_module('pool2', nn.MaxPool2d(2, 2)) # b, 64, 8, 8
        self.layer2 = layer2
        
        layer3 = nn.Sequential() 
        layer3.add_module('conv3', nn.Conv2d(64, 128, 3, 1, padding=1)) # b, 128, 8, 8
        layer3.add_module('relu3', nn.ReLU(True))
        layer3.add_module('pool3', nn.MaxPool2d(2, 2)) # b, 128, 4, 4
        self.layer3 = layer3
        
        layer4 = nn.Sequential()
        layer4.add_module('fc1', nn.Linear(2048, 512))
        layer4.add_module('fc_relu1', nn.ReLU(True))
        layer4.add_module('fc2', nn.Linear(512, 64))
        layer4.add_module('fc_relu2', nn.ReLU(True))
        layer4.add_module('fc3', nn.Linear(64, 10))
        self.layer4 = layer4
        
    def forward(self, x):
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        fc_input = conv3.view(conv3.size(0), -1)
        fc_out = self.layer4(fc_input)
        return fc_out

In [3]:
model = SimpleCNN()
print(model)

SimpleCNN(
  (layer1): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer4): Sequential(
    (fc1): Linear(in_features=2048, out_features=512, bias=True)
    (fc_relu1): ReLU(inplace=True)
    (fc2): Linear(in_features=512, out_features=64, bias=True)
    (fc_relu2): ReLU(inplace=True)
    (fc3): Linear(in_features=64, out_features=10, bias=True)
  )
)


- 如何提取网络中指定的层结构、参数，以及如何对参数进行自定义的初始化。

In [4]:
new_model = nn.Sequential(*list(model.children())[:2])
new_model

Sequential(
  (0): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)

In [5]:
for layer in model.children():
    print(layer)
    print('*'*30)

Sequential(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
******************************
Sequential(
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU(inplace=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
******************************
Sequential(
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU(inplace=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
******************************
Sequential(
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc_relu1): ReLU(inplace=True)
  (fc2): Linear(in_features=512, out_features=64, bias=True)
  (fc_relu2): ReLU(inplace=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)
**********************

In [6]:
for param in model.named_parameters():
    print(param[0])

layer1.conv1.weight
layer1.conv1.bias
layer2.conv2.weight
layer2.conv2.bias
layer3.conv3.weight
layer3.conv3.bias
layer4.fc1.weight
layer4.fc1.bias
layer4.fc2.weight
layer4.fc2.bias
layer4.fc3.weight
layer4.fc3.bias


In [7]:
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        nn.init.normal(m.weight.data)
        nn.init.xavier_normal(m.weight.data)
        nn.init.kaiming_normal(m.weight.data)
    elif isinstance(m, nn.Linear):
        m.weight.data.normal_()

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """


#### LeNet

In [8]:
class Lenet(nn.Module):
    def __init__(self):
        super(Lenet, self).__init__()
        layer1 = nn.Sequential() # b, 1, 32, 32
        layer1.add_module('conv1', nn.Conv2d(1, 6, 5, 1)) # b, 6, 28, 28
        # layer1.add_module('relu1', nn.ReLU())
        layer1.add_module('pool1', nn.MaxPool2d(2, 2)) # b, 6, 14, 14
        self.layer1 = layer1
        
        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(6, 16, 5, 1)) # b, 16, 10, 10
        # layer2.add_module('relu2', nn.ReLU())
        layer2.add_module('pool2', nn.MaxPool2d(2, 2)) # b, 16, 5, 5
        self.layer2 = layer2
        
        layer3 = nn.Sequential()
        layer3.add_module('fc1', nn.Linear(400, 120))
        # layer3.add_module('fc_relu1', nn.ReLU())
        layer3.add_module('fc2', nn.Linear(120, 84))
        # layer3.add_module('fc_relu2', nn.ReLU())
        layer3.add_model('fc3', nn.Linear(84, 10))
        self.layer3 = layer3
        
    def forward(self, x):
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        finput = conv2.view(conv2.size(0), -1)
        out = self.layer3(finput)
        return out

#### Inception

In [9]:
import torch.nn.functional as F

class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)
    
class Inception(nn.Module):
    def __init__(self, in_channels, pool_features):
        super(Inception, self).__init__()
        self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1)
        
        self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1)
        self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2)
        
        self.branch3x3db_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.branch3x3db_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
        self.branch3x3db_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)
        
        self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1)
        
    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        
        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)
        
        branch3x3db1 = self.branch3x3db_1(x)
        branch3x3db1 = self.branch3x3db_2(branch3x3db1)
        branch3x3db1 = self.branch3x3db_3(branch3x3db1)
        
        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)
        
        outputs = [branch1x1, branch5x5, branch3x3db1, branch_pool]
        return torch.cat(outputs, 1) # 按深度拼接起来

#### ResNet

In [10]:
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            residual = self.downsample(residual)
        
        out += residual
        out = self.relu(out)
        return out

#### MNIST分类

In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        layer1 = nn.Sequential() # b, 1, 28, 28
        layer1.add_module('conv1', nn.Conv2d(1, 16, kernel_size=3)) # b, 16, 26, 26
        layer1.add_module('bn1', nn.BatchNorm2d(16))
        layer1.add_module('relu1', nn.ReLU(inplace=True))
        self.layer1 = layer1
        
        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(16, 32, kernel_size=3)) # n, 32, 24, 24
        layer2.add_module('bn2', nn.BatchNorm2d(32))
        layer2.add_module('relu2', nn.ReLU(inplace=True))
        layer2.add_module('pool2', nn.MaxPool2d(kernel_size=2, stride=2)) # n, 32, 12, 12
        self.layer2 = layer2
        
        layer3 = nn.Sequential() 
        layer3.add_module('conv3', nn.Conv2d(32, 64, kernel_size=3)) # b, 64, 10, 10
        layer3.add_module('bn3', nn.BatchNorm2d(64))
        layer3.add_module('relu3', nn.ReLU(inplace=True)) 
        self.layer3 = layer3
        
        layer4 = nn.Sequential()
        layer4.add_module('conv4', nn.Conv2d(64, 128, kernel_size=3)) # n, 128, 8, 8
        layer4.add_module('bn4', nn.BatchNorm2d(128))
        layer4.add_module('relu4', nn.ReLU(inplace=True))
        layer4.add_module('pool4', nn.MaxPool2d(kernel_size=2, stride=2)) # n, 128, 4, 4
        self.layer4 = layer4
        
        fc = nn.Sequential()
        fc.add_module('fc1', nn.Linear(128*4*4, 1024))
        fc.add_module('fc_relu1', nn.ReLU(inplace=True))
        fc.add_module('fc2', nn.Linear(1024, 128))
        fc.add_module('fc_relu2', nn.ReLU(inplace=True))
        fc.add_module('fc3', nn.Linear(128, 10))
        self.fc = fc
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [3]:
batch_size = 64
learning_rate = 1e-2
num_epoches = 20

data_tf = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize([0.5], [0.5])]) # 减去0.5再除以0.5
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=data_tf)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model = CNN()
if torch.cuda.is_available():
    model.cuda()
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epoches):
    train_loss = 0.0
    train_acc = 0
    for batch_x, batch_y in train_loader:
        # shape of batch_x: [64, 1, 28, 28]
        # shape of batch_y: [64]
        if torch.cuda.is_available():
            batch_x = Variable(batch_x).cuda()
            batch_y = Variable(batch_y).cuda()
        else:
            batch_x = Variable(batch_x)
            batch_y = Variable(batch_y)
        
        out = model(batch_x)
        print(out)
        print(batch_y)
        print('*'*30)
        loss = criterion(out, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.data.item()
        _, pred = torch.max(out, 1)
        train_acc += (pred == batch_y).sum().item()
    print('Epoch [{}/{}], loss: {:.6f}, acc: {:.6f}'.format(epoch, num_epoches, 
                                               train_loss / len(train_dataset),
                                               train_acc / len(train_dataset)))

tensor([[-0.0927, -0.1091, -0.1177, -0.0379,  0.0591,  0.0538, -0.1916,  0.2092,
          0.0629,  0.0753],
        [-0.0570, -0.0821, -0.0698, -0.0813, -0.0126,  0.0553, -0.1804,  0.1728,
          0.0231,  0.1353],
        [-0.0946, -0.1541, -0.1348,  0.0135, -0.0268,  0.1215, -0.1501,  0.0721,
          0.0686,  0.1397],
        [-0.0289,  0.0183, -0.1879, -0.1176, -0.0509,  0.0691, -0.2066,  0.0492,
         -0.0076,  0.0973],
        [-0.0059,  0.0570, -0.1294, -0.0853, -0.0550,  0.0506, -0.1708,  0.1074,
          0.0607, -0.0017],
        [-0.0612, -0.0312, -0.1830, -0.0783, -0.0024,  0.0201, -0.0467,  0.0682,
          0.1086,  0.0431],
        [-0.0173, -0.0952, -0.1956, -0.0349,  0.0320,  0.0522, -0.2896,  0.1240,
         -0.0173,  0.0881],
        [ 0.0012,  0.0674, -0.1484, -0.0743, -0.0764, -0.0169, -0.2697,  0.0644,
          0.0352,  0.0264],
        [-0.0853, -0.0797, -0.1294, -0.1591, -0.0045, -0.0029, -0.0801,  0.1051,
          0.0729,  0.0846],
        [-0.0625, -

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([9, 2, 6, 9, 9, 7, 6, 0, 7, 9, 3, 9, 8, 9, 2, 2, 2, 6, 5, 8, 9, 0, 7, 5,
        2, 7, 1, 7, 1, 6, 0, 0, 4, 3, 5, 8, 8, 6, 8, 0, 6, 1, 6, 5, 9, 4, 2, 1,
        1, 6, 6, 0, 0, 7, 3, 3, 2, 5, 7, 8, 2, 3, 0, 3], device='cuda:0')
******************************
tensor([[-4.5745e-02,  2.9020e-03, -4.8938e-02, -9.9636e-02, -9.1814e-02,
          1.0188e-01, -1.3929e-01,  3.9823e-02,  5.7105e-02,  1.4370e-01],
        [-3.4947e-02, -1.1724e-01, -6.4764e-02,  2.9972e-02, -2.9421e-02,
         -1.6832e-02, -9.0845e-02,  8.6816e-02,  7.1864e-02,  1.2028e-01],
        [-7.3530e-02, -3.4618e-02, -6.6236e-02, -1.0365e-01, -1.0160e-01,
         -7.8061e-02, -3.2896e-02,  5.9112e-02,  8.5830e-02,  7.8625e-02],
        [ 2.9494e-02, -9.1655e-02, -1.1608e-01, -1.9001e-01, -8.3124e-02,
         -4.4285e-02, -1.2309e-02, -6.3369e-03,  8.8439e-02,  1.1996e-01],
        [-1.0558e-02, -1.4450e-01, -8.9734e-02,  2.6878e-02, -7.2552e-02,
          1.7694

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([7, 1, 5, 2, 1, 5, 7, 3, 5, 4, 5, 1, 4, 4, 6, 7, 4, 7, 9, 0, 0, 2, 8, 5,
        7, 9, 3, 2, 7, 0, 2, 3, 9, 0, 0, 7, 1, 7, 1, 4, 4, 6, 9, 6, 1, 8, 9, 0,
        1, 2, 5, 2, 2, 2, 1, 5, 6, 6, 5, 3, 5, 0, 1, 0], device='cuda:0')
******************************
tensor([[ 0.0714,  0.0005, -0.0475, -0.1594, -0.1278, -0.0127, -0.1606,  0.0250,
         -0.0543,  0.0364],
        [-0.0309,  0.0202, -0.0310, -0.1466, -0.0157,  0.0245, -0.0831, -0.0148,
          0.0739,  0.0833],
        [ 0.0325, -0.0670, -0.0279, -0.1478, -0.0403,  0.0501, -0.0697,  0.1074,
         -0.0283,  0.1441],
        [-0.0271, -0.0527, -0.1088, -0.0644, -0.0398,  0.0481, -0.1106,  0.0798,
          0.0208,  0.1220],
        [ 0.0414, -0.0412, -0.1388, -0.0883, -0.0371,  0.0261, -0.0379,  0.0097,
          0.0387,  0.1959],
        [-0.0231, -0.1492, -0.0996,  0.0275, -0.0249,  0.0997, -0.0401,  0.2341,
          0.0993,  0.1270],
        [-0.0497, -0.1179, -0.14

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([1, 5, 7, 9, 3, 2, 8, 7, 7, 3, 5, 4, 6, 0, 6, 9, 0, 0, 5, 4, 7, 9, 2, 6,
        1, 1, 4, 6, 2, 1, 4, 1, 2, 9, 7, 0, 5, 2, 9, 1, 7, 1, 5, 7, 5, 6, 5, 8,
        3, 0, 7, 4, 3, 0, 7, 1, 1, 9, 4, 4, 5, 3, 0, 2], device='cuda:0')
******************************
tensor([[-7.2410e-02, -8.7755e-02, -1.5660e-01, -8.1720e-02, -6.7755e-02,
         -2.1378e-02, -4.2975e-02,  1.9515e-01,  2.0199e-02,  1.1882e-02],
        [-5.8007e-02, -1.0494e-01, -4.7490e-02, -1.4530e-01, -7.3779e-02,
         -1.4565e-02,  2.5027e-02,  1.0077e-02, -1.5013e-03,  3.2568e-02],
        [-2.2667e-02, -4.4303e-02, -8.5049e-02, -1.4910e-02, -8.0314e-02,
         -2.5941e-03, -3.1995e-02,  7.8703e-02, -2.8229e-02,  3.2246e-02],
        [ 1.5135e-02, -8.4574e-02, -1.3174e-01, -1.3180e-01, -6.9051e-02,
         -2.1435e-02,  2.1670e-04,  1.0618e-01, -1.5285e-02,  1.1136e-01],
        [-2.3731e-02, -5.4378e-02, -1.1062e-01, -1.3954e-01, -6.6492e-02,
          8.2818

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([7, 2, 1, 6, 8, 2, 2, 6, 5, 3, 1, 5, 7, 5, 7, 1, 0, 0, 1, 3, 2, 9, 0, 5,
        0, 2, 9, 3, 8, 7, 4, 1, 4, 3, 7, 6, 9, 5, 1, 9, 8, 4, 2, 5, 3, 8, 3, 7,
        9, 3, 6, 0, 6, 3, 8, 3, 2, 4, 5, 9, 4, 8, 1, 8], device='cuda:0')
******************************
tensor([[-0.0563, -0.0776, -0.0656, -0.0818,  0.0203,  0.0058, -0.0219,  0.0323,
          0.0732,  0.0627],
        [ 0.0244, -0.0057, -0.0152, -0.1623, -0.0472, -0.0225, -0.1446,  0.0266,
          0.0215,  0.0708],
        [-0.0458, -0.1118, -0.0891, -0.2058, -0.0249, -0.0567, -0.0405,  0.0796,
          0.0075,  0.1084],
        [-0.1214, -0.1105, -0.1248, -0.0621,  0.0247, -0.0389, -0.0352,  0.1021,
          0.0969, -0.0546],
        [ 0.0139, -0.0385, -0.1711, -0.1183, -0.0338, -0.0252, -0.0125,  0.0568,
         -0.0121,  0.1510],
        [-0.0779, -0.0686, -0.1023, -0.0067, -0.0375,  0.0141, -0.0994,  0.0319,
         -0.0128,  0.0718],
        [ 0.0008,  0.0229, -0.11

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([1, 0, 4, 1, 6, 2, 8, 6, 4, 1, 8, 4, 8, 3, 8, 4, 7, 6, 5, 6, 7, 4, 9, 6,
        0, 5, 1, 4, 9, 4, 8, 9, 7, 3, 8, 0, 6, 8, 1, 1, 7, 1, 8, 7, 0, 0, 3, 0,
        2, 8, 5, 0, 0, 2, 3, 7, 8, 9, 1, 9, 4, 7, 2, 0], device='cuda:0')
******************************
tensor([[-7.4292e-02, -1.1466e-01, -9.4559e-02, -1.2262e-01,  9.0553e-03,
         -1.9199e-02, -1.6335e-01, -2.6023e-02,  5.0944e-02,  1.6899e-02],
        [-5.9086e-03, -1.4601e-01, -8.1757e-02, -1.6072e-01,  3.4741e-02,
         -9.9997e-02, -7.9542e-02,  5.3450e-02,  3.2148e-02,  6.8901e-02],
        [-1.9688e-02,  4.3590e-03,  7.8584e-03, -1.3116e-01,  3.7552e-02,
         -3.1640e-02, -5.9016e-02,  2.5020e-02,  1.4090e-01,  8.1378e-02],
        [-3.1609e-02, -1.0657e-01,  4.0463e-02, -1.5349e-01,  2.9169e-02,
         -9.2956e-02, -6.0064e-02,  3.3433e-02,  9.4707e-02, -4.4706e-02],
        [-2.2520e-02, -8.8453e-02,  2.6292e-02, -2.6377e-01, -6.7416e-02,
          3.4081

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([3, 3, 4, 2, 5, 6, 3, 1, 5, 1, 5, 4, 6, 2, 2, 3, 9, 1, 8, 4, 5, 7, 4, 4,
        5, 8, 0, 2, 1, 9, 3, 2, 3, 3, 2, 3, 7, 0, 1, 2, 1, 7, 3, 4, 8, 7, 6, 4,
        2, 0, 9, 3, 9, 7, 2, 5, 9, 3, 4, 8, 2, 2, 3, 0], device='cuda:0')
******************************
tensor([[-1.0478e-01,  4.7421e-02,  3.2508e-03, -1.2326e-01, -5.7137e-03,
         -9.2583e-02, -1.6337e-01, -4.1787e-03,  5.3356e-02, -2.4794e-02],
        [-1.1229e-02, -1.3389e-01, -1.7471e-02,  4.0183e-02, -4.8583e-02,
         -9.5026e-02, -5.5382e-02,  1.4962e-01,  1.5278e-01, -2.6526e-02],
        [-2.0075e-01, -1.9757e-01, -1.3096e-01,  1.8603e-02,  1.3233e-02,
         -9.3517e-02, -1.4493e-01,  8.0127e-02,  1.2055e-01, -3.9879e-02],
        [-1.1220e-01, -1.6436e-02, -4.5772e-03, -4.0411e-02,  4.7026e-02,
          1.6104e-02, -1.3397e-01,  4.7849e-02,  1.3535e-01,  9.7827e-03],
        [-9.7561e-02, -1.6606e-01, -7.4156e-02,  9.5473e-03,  8.3222e-02,
         -1.1217

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([1, 3, 3, 9, 3, 5, 1, 5, 7, 0, 3, 9, 6, 6, 5, 2, 8, 8, 5, 2, 7, 3, 7, 1,
        7, 1, 7, 9, 4, 8, 4, 8, 8, 6, 2, 6, 1, 1, 9, 6, 1, 7, 0, 8, 4, 0, 5, 0,
        7, 6, 6, 3, 6, 8, 8, 0, 0, 1, 9, 1, 1, 7, 7, 6], device='cuda:0')
******************************
tensor([[-1.4889e-02, -5.4020e-02, -7.5762e-02, -6.3592e-02,  7.8286e-02,
         -7.5007e-02,  6.2569e-02,  5.2409e-02,  8.8593e-02,  7.7477e-02],
        [ 1.2350e-01, -1.5946e-02, -1.0326e-01, -1.4948e-01, -1.7393e-01,
         -1.0219e-01, -4.6381e-02,  5.6209e-02,  1.4053e-02, -5.1952e-02],
        [ 9.6778e-03, -1.0309e-01, -1.1070e-01, -1.4266e-01,  4.2586e-02,
         -4.3286e-02, -4.6602e-02,  1.2109e-01,  9.3253e-02,  1.3074e-01],
        [-7.7039e-02, -1.0172e-01, -6.7628e-02, -3.1806e-02, -8.9127e-03,
         -2.1087e-02,  7.0864e-03,  2.6116e-01,  9.9336e-02,  2.2730e-02],
        [-7.7099e-02,  4.0592e-03, -6.9506e-03, -4.1326e-02, -2.6457e-02,
         -6.2738

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([4, 0, 9, 7, 5, 1, 1, 1, 6, 7, 1, 7, 7, 9, 6, 7, 3, 2, 5, 8, 2, 8, 0, 7,
        7, 6, 7, 5, 6, 4, 8, 1, 1, 3, 9, 2, 1, 8, 5, 9, 9, 7, 4, 3, 4, 9, 5, 1,
        2, 6, 5, 5, 0, 2, 5, 6, 5, 6, 4, 8, 8, 9, 4, 1], device='cuda:0')
******************************
tensor([[-9.0794e-02, -8.0311e-02, -1.3475e-01,  4.3025e-02,  1.3125e-02,
         -1.5309e-01,  6.1715e-03,  2.6738e-01,  4.5476e-02,  3.5240e-02],
        [-1.4176e-01,  2.3129e-02,  3.0680e-02, -8.2584e-02, -1.6746e-01,
          4.5641e-02, -5.1366e-02, -5.6650e-02,  8.3316e-02,  8.4097e-02],
        [-8.1016e-02, -1.4441e-02, -1.2555e-01, -2.5813e-02, -2.5188e-02,
         -1.3423e-01,  9.7268e-03,  5.7125e-02,  1.1992e-01,  8.9369e-02],
        [-8.7306e-02, -1.3547e-01, -8.2583e-02,  9.0082e-03, -3.7294e-02,
          4.8660e-02, -1.3323e-01,  2.5255e-01,  1.1636e-01, -1.4238e-02],
        [-1.3948e-01,  1.4289e-01, -7.8696e-02, -1.0907e-01, -3.7147e-02,
         -3.2398

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([7, 8, 3, 7, 1, 1, 3, 8, 5, 7, 3, 2, 7, 3, 1, 8, 8, 7, 5, 0, 8, 2, 2, 7,
        4, 8, 0, 8, 8, 5, 8, 0, 3, 1, 0, 6, 5, 4, 7, 0, 7, 7, 9, 2, 3, 4, 7, 6,
        2, 0, 2, 9, 3, 8, 8, 5, 2, 1, 4, 7, 5, 8, 3, 1], device='cuda:0')
******************************
tensor([[-1.5750e-01, -1.3597e-01, -3.1209e-02,  3.1585e-02,  2.5132e-02,
         -2.7007e-02, -4.2236e-02,  1.8856e-01,  1.8085e-01, -8.2125e-03],
        [-7.3614e-02, -1.4158e-01, -1.9678e-02,  6.9139e-02, -6.5543e-02,
         -8.2612e-02, -3.2544e-02,  2.0735e-01,  2.8976e-01, -1.1911e-01],
        [-1.6289e-01, -1.0435e-01, -1.4458e-02,  1.7257e-02, -5.0737e-02,
         -5.0865e-02, -1.6510e-01,  2.2000e-01,  2.7016e-01, -6.1634e-02],
        [-1.6174e-01, -2.7777e-02, -6.8619e-02, -2.2547e-02, -6.4875e-02,
         -1.2284e-01, -8.2604e-02,  9.2320e-02,  2.1354e-01, -9.7958e-02],
        [-1.7989e-01, -1.5087e-01, -6.2542e-02,  5.8906e-02, -5.3526e-02,
         -6.0304

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([5, 3, 3, 3, 3, 0, 1, 7, 6, 9, 6, 2, 0, 0, 4, 9, 0, 4, 2, 1, 2, 7, 4, 7,
        7, 0, 0, 6, 8, 8, 5, 0, 6, 4, 5, 7, 1, 6, 9, 0, 9, 7, 2, 8, 8, 2, 1, 1,
        6, 4, 1, 5, 3, 7, 7, 3, 4, 9, 3, 3, 8, 3, 7, 6], device='cuda:0')
******************************
tensor([[ 0.0131,  0.0594,  0.1523, -0.1702, -0.0914, -0.0352, -0.0377,  0.0443,
          0.1783, -0.0763],
        [-0.1394, -0.0553, -0.0239, -0.0838, -0.0284, -0.0212, -0.0081,  0.1199,
          0.1789, -0.0484],
        [-0.1331, -0.0651, -0.0360, -0.0565, -0.0850, -0.1008, -0.0107,  0.0799,
          0.1502, -0.0712],
        [-0.0770, -0.0459,  0.0843, -0.1297, -0.0693, -0.1447, -0.0324,  0.0091,
          0.2027, -0.1131],
        [-0.1474, -0.1290, -0.0153, -0.0589, -0.0511,  0.0755, -0.0950,  0.1998,
          0.2602, -0.0875],
        [-0.0766, -0.0901, -0.0736, -0.1052, -0.0464, -0.1099,  0.1417,  0.2369,
          0.1283,  0.0286],
        [-0.1379, -0.1536, -0.09

          0.0662, -0.0774]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([6, 1, 1, 8, 4, 0, 2, 0, 4, 5, 8, 7, 1, 1, 7, 7, 1, 2, 9, 4, 6, 7, 6, 0,
        8, 9, 6, 7, 7, 3, 7, 1, 5, 8, 4, 0, 2, 1, 2, 6, 9, 1, 1, 6, 1, 4, 0, 5,
        1, 2, 4, 2, 9, 0, 9, 2, 5, 4, 2, 6, 4, 8, 1, 1], device='cuda:0')
******************************
tensor([[-9.2736e-02, -5.7771e-02, -1.7085e-02, -5.4074e-02, -5.9542e-03,
         -5.5398e-03, -4.9183e-02,  3.1736e-02,  1.5188e-01, -1.0601e-01],
        [-8.2414e-02, -8.3251e-02,  4.2099e-02, -1.1868e-01, -2.1747e-02,
          5.3885e-02, -2.3768e-02,  8.1887e-02,  8.0155e-02, -5.9458e-02],
        [-6.3575e-02, -1.2809e-01, -8.4724e-02, -5.0167e-02,  1.2404e-01,
         -1.4858e-01,  9.0705e-03,  7.2680e-02,  7.3970e-02,  1.0060e-01],
        [-7.8901e-02, -1.6199e-01,  3.3213e-02, -1.8427e-01,  1.6747e-01,
         -5.2937e-02,  2.0343e-02,  5.3425e-02,  7.5240e-02,  1.0624e-01],
        [ 2.9150e-02, -1.9533e-01, -1.5482e-01, -1.4083e-01, -4.2852

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([5, 5, 4, 4, 7, 6, 4, 5, 9, 6, 3, 5, 5, 1, 3, 3, 3, 1, 8, 2, 6, 2, 0, 1,
        1, 0, 9, 7, 5, 7, 2, 8, 3, 3, 0, 1, 0, 1, 1, 0, 9, 9, 1, 1, 3, 9, 9, 4,
        8, 5, 0, 9, 4, 7, 2, 5, 3, 8, 1, 9, 6, 4, 7, 2], device='cuda:0')
******************************
tensor([[-0.1918, -0.1095, -0.0576, -0.0810, -0.1246,  0.0083,  0.0005,  0.0218,
          0.1844, -0.0745],
        [-0.1264,  0.2472, -0.0745, -0.1283, -0.1644, -0.0969, -0.0612, -0.0737,
          0.0850, -0.0985],
        [-0.0950, -0.0581,  0.0209, -0.0675, -0.0923, -0.0907, -0.0187,  0.1502,
          0.0780, -0.1289],
        [-0.0421, -0.1558,  0.0849, -0.1629, -0.0380, -0.1029,  0.1527,  0.1615,
          0.1760,  0.0040],
        [-0.0358, -0.1062, -0.1457, -0.0014, -0.0394, -0.1062, -0.0795,  0.3855,
          0.0583, -0.0925],
        [-0.1007, -0.1109,  0.0157, -0.1104,  0.0673, -0.0195,  0.0703,  0.0263,
          0.1902,  0.0100],
        [-0.1235, -0.1226, -0.15

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([7, 9, 3, 8, 4, 8, 1, 9, 1, 5, 5, 0, 7, 3, 1, 0, 4, 5, 7, 6, 6, 9, 1, 5,
        9, 5, 9, 2, 1, 5, 6, 1, 8, 2, 1, 9, 2, 7, 8, 3, 9, 2, 9, 3, 8, 3, 4, 4,
        5, 9, 7, 0, 5, 2, 1, 8, 3, 9, 2, 6, 5, 9, 1, 3], device='cuda:0')
******************************
tensor([[-1.3186e-01, -2.2229e-01, -3.3814e-02,  1.3833e-01,  4.8970e-03,
         -6.8364e-02, -8.3063e-02,  7.7784e-02,  3.9755e-02,  1.5069e-01],
        [-2.0543e-01,  4.2087e-02, -1.8953e-02, -7.9642e-02, -9.0554e-02,
         -6.3106e-03, -6.3013e-02,  1.0345e-01,  2.9744e-02,  5.6114e-04],
        [-5.4502e-03,  4.7013e-02,  5.0853e-02, -1.7498e-01, -6.1427e-02,
          6.9912e-02, -6.6717e-02, -4.3631e-02,  2.5686e-02, -2.8223e-02],
        [-6.6230e-02, -1.6587e-01,  7.4913e-02, -2.3225e-01, -3.6992e-03,
         -1.1550e-02,  1.3645e-02,  2.3951e-02,  7.9233e-02,  1.7234e-01],
        [-1.5926e-01,  2.8963e-01, -1.1276e-01, -1.4042e-01, -1.3715e-01,
         -8.9694

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([3, 7, 0, 4, 1, 2, 7, 3, 4, 7, 8, 6, 0, 0, 7, 5, 1, 2, 4, 0, 1, 3, 4, 2,
        4, 1, 9, 1, 5, 5, 5, 1, 7, 8, 8, 1, 1, 9, 3, 5, 8, 1, 2, 7, 2, 6, 5, 7,
        3, 4, 3, 5, 1, 9, 7, 1, 8, 0, 2, 1, 3, 2, 5, 3], device='cuda:0')
******************************
tensor([[-1.8947e-01, -1.2346e-01, -7.5480e-02, -2.2153e-01,  1.9510e-01,
          1.5641e-01, -1.0238e-01,  1.3673e-04,  9.9625e-02,  1.6136e-01],
        [-1.1131e-01, -1.4194e-01, -1.9651e-01, -1.0553e-02,  1.4062e-02,
         -5.8472e-02, -2.2391e-01,  4.1842e-01,  3.1321e-02,  2.1725e-04],
        [-1.0474e-01,  8.3951e-02,  6.2072e-02, -1.3530e-01, -5.1709e-02,
          2.0090e-01, -1.6706e-01, -1.0025e-01,  9.2187e-02, -6.0605e-02],
        [-1.9035e-01, -1.9320e-01, -7.7950e-02,  5.4329e-02,  1.2895e-01,
          1.0021e-01, -8.0793e-02,  2.0750e-01,  4.7409e-02,  1.2799e-01],
        [ 5.9691e-02, -8.4167e-02, -9.0311e-02,  9.7616e-02, -1.0941e-01,
         -5.5345

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([4, 7, 5, 4, 5, 7, 6, 9, 7, 0, 4, 8, 9, 5, 6, 8, 6, 3, 7, 8, 1, 5, 4, 4,
        3, 6, 7, 8, 3, 3, 3, 8, 0, 4, 6, 3, 6, 2, 1, 7, 2, 3, 6, 2, 7, 4, 1, 1,
        3, 6, 9, 8, 9, 0, 3, 5, 5, 4, 0, 8, 4, 8, 7, 5], device='cuda:0')
******************************
tensor([[-1.3726e-01, -1.6895e-01, -7.2596e-02,  1.9556e-01,  4.5437e-02,
          1.4917e-02, -3.1247e-02,  1.6880e-01, -5.8495e-02, -2.3902e-02],
        [-2.5386e-01, -1.7198e-01, -8.7912e-02,  2.8885e-01, -5.9336e-02,
         -1.2866e-02, -1.0888e-01,  2.6459e-01,  8.4516e-02, -1.1156e-01],
        [-2.5202e-01, -8.5600e-02, -3.5576e-02, -5.5724e-02,  2.4752e-01,
          1.3360e-01, -8.1079e-02,  8.6310e-02,  2.1417e-01,  6.4347e-02],
        [-6.8958e-02, -1.4505e-01, -1.1268e-01,  8.3807e-02, -3.1294e-02,
          2.0820e-01, -1.1027e-01,  1.1189e-01,  6.2847e-02,  3.6905e-02],
        [-3.0500e-01, -1.4272e-01, -7.7246e-02,  2.1772e-01, -5.9415e-03,
          7.7507

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([5, 3, 4, 5, 3, 3, 4, 0, 4, 0, 4, 6, 7, 7, 8, 7, 1, 4, 7, 8, 7, 0, 8, 3,
        9, 2, 3, 5, 5, 5, 7, 8, 3, 1, 2, 9, 1, 3, 4, 0, 5, 5, 2, 2, 4, 4, 3, 0,
        8, 0, 3, 5, 5, 9, 2, 0, 9, 2, 2, 8, 6, 2, 5, 0], device='cuda:0')
******************************
tensor([[-0.2066, -0.3001, -0.0154,  0.1310, -0.0029,  0.1302, -0.2383,  0.2534,
          0.1386,  0.0026],
        [-0.2003, -0.0168,  0.0307,  0.0374, -0.0287,  0.2538, -0.1594, -0.0789,
          0.0918, -0.0615],
        [-0.0351, -0.3003, -0.0386,  0.1522,  0.0012,  0.1145, -0.1635,  0.2391,
          0.0546,  0.1550],
        [-0.2904,  0.0128,  0.0360, -0.0710,  0.0443,  0.2633, -0.1898,  0.0169,
          0.3193, -0.1091],
        [ 0.2089, -0.0851, -0.0500, -0.1273, -0.0613,  0.0494, -0.1080, -0.0605,
         -0.1134,  0.0189],
        [-0.1439, -0.1659,  0.0896,  0.0709, -0.0387,  0.0872, -0.0524,  0.0291,
          0.0452, -0.0358],
        [-0.2842,  0.3166,  0.03

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([1, 1, 7, 9, 4, 1, 4, 1, 6, 2, 6, 7, 2, 8, 4, 1, 7, 5, 3, 4, 9, 8, 8, 9,
        9, 1, 4, 9, 1, 7, 9, 8, 2, 8, 4, 1, 1, 5, 0, 5, 9, 0, 2, 0, 5, 1, 5, 1,
        2, 8, 0, 0, 6, 5, 5, 4, 8, 8, 5, 5, 6, 7, 3, 4], device='cuda:0')
******************************
tensor([[ 2.9428e-01, -9.2481e-02, -2.5773e-02, -2.0970e-02, -1.0210e-01,
          1.3145e-01, -2.6567e-01, -9.5041e-02, -1.9254e-01,  7.0376e-02],
        [ 2.1001e-01, -8.5604e-02,  5.6108e-02, -7.3852e-02, -1.6475e-01,
          4.3235e-02, -1.6125e-01, -1.5217e-01, -1.3378e-02, -1.5225e-02],
        [ 1.2152e-01, -1.2658e-01, -2.1299e-02, -2.2811e-02, -1.0206e-01,
         -6.5700e-02, -1.1102e-01,  9.7327e-02, -2.6489e-02,  1.1959e-02],
        [ 4.4925e-02, -1.5753e-01,  9.3136e-02, -1.8087e-01,  2.1038e-02,
          8.7908e-02, -6.8352e-03, -8.4443e-02, -3.6062e-02, -1.0140e-02],
        [-2.2044e-01,  4.4691e-02,  6.9572e-02,  6.4737e-02,  8.1240e-02,
          1.4599

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([0, 0, 0, 0, 8, 1, 7, 3, 6, 1, 1, 1, 7, 1, 9, 6, 7, 0, 9, 1, 9, 2, 3, 5,
        9, 5, 7, 3, 7, 0, 9, 3, 1, 3, 0, 4, 0, 1, 2, 1, 4, 2, 9, 6, 8, 0, 0, 5,
        6, 4, 1, 4, 8, 7, 5, 2, 0, 0, 5, 7, 5, 0, 6, 8], device='cuda:0')
******************************
tensor([[-0.1251,  0.3135,  0.0684, -0.0442, -0.0839,  0.1699, -0.1182, -0.2263,
          0.3463, -0.1383],
        [-0.0550, -0.0758, -0.1081, -0.1704,  0.0396,  0.2354, -0.1184, -0.0506,
          0.0279,  0.0951],
        [-0.1956, -0.0475, -0.2267,  0.0896,  0.0681,  0.2401, -0.1366,  0.0430,
          0.1549,  0.0592],
        [-0.0731, -0.2241,  0.0094,  0.0885,  0.3297,  0.1106, -0.2152,  0.0026,
          0.0672,  0.0537],
        [-0.2038,  0.5047,  0.0405, -0.3074, -0.0132, -0.0403, -0.2737, -0.1962,
          0.2340, -0.3281],
        [-0.1489,  0.5675,  0.0497, -0.2460, -0.0490, -0.0776, -0.2106, -0.1561,
          0.1809, -0.3664],
        [ 0.0415, -0.2015,  0.17

       device='cuda:0', grad_fn=<AddmmBackward>)
tensor([7, 2, 6, 5, 8, 0, 3, 1, 7, 7, 4, 6, 6, 4, 1, 7, 3, 8, 2, 8, 6, 8, 3, 5,
        3, 9, 0, 2, 0, 1, 5, 5, 6, 4, 8, 5, 5, 8, 2, 7, 4, 7, 6, 7, 1, 2, 5, 8,
        2, 0, 5, 1, 2, 7, 6, 1, 0, 6, 7, 6, 5, 6, 7, 1], device='cuda:0')
******************************


KeyboardInterrupt: 

In [17]:
model.eval()
test_acc = 0
test_loss = 0.0
for data in test_loader:
    img, label = data
    if torch.cuda.is_available():
        img = Variable(img).cuda()
        label = Variable(label).cuda()
    else:
        img = Variable(img)
        label = Variable(label)
    
    out = model(img)
    loss = criterion(out, label)
    _, pred = torch.max(out, 1)
    test_acc += (pred == label).sum().item()
    test_loss += loss.data.item() * label.size(0)
print('acc: {:.6f}, loss: {:.6f}'.format(test_acc / len(test_dataset), test_loss / len(test_dataset)))

acc: 0.992900, loss: 0.019592


#### cifar10

In [16]:
train_transform = transforms.Compose([
    transforms.Resize(40),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # 中心化
])

def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out