### LeNet

In [1]:
import torch

In [2]:
torch.__version__

'0.4.1'

In [3]:
import torch.nn as nn

In [4]:
import torch.nn.functional as F

In [5]:
import torch.optim as optim

In [6]:
import torchvision

In [7]:
import torchvision.datasets as datasets

In [8]:
import torchvision.transforms as transforms

In [9]:
import os 
import random
import numpy as np 

#### 图像的Normalize

每个像素-mean/std

每个像素的归一化缩放

思考:

1.归一化哪部分数据？A训练集、B评测集、C训练集+评测集 -> C

2.归一化的参数mean和std来自于？A训练集、B评测集、C训练集+评测集 -> A

In [10]:
#np.mean(mnist.train.images)

In [11]:
#np.std(mnist.train.images)

#### 数据的归一化

In [12]:
data_trans=transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))#参数mean和std来自于训练集，但是transform本身在训练和评测的时候都会使用
])

In [13]:
data_trans_alexnet=transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))#参数mean和std来自于训练集，但是transform本身在训练和评测的时候都会使用
])

In [94]:
# train_data=datasets.MNIST('data',train=True,download=True,transform=data_trans)
# test_data=datasets.MNIST('data',train=False,download=True,transform=data_trans)

In [95]:
train_data=datasets.CIFAR10('data',train=True,download=True,transform=data_trans)
test_data=datasets.CIFAR10('data',train=False,download=True,transform=data_trans)

Files already downloaded and verified
Files already downloaded and verified


In [96]:
# train_data=datasets.MNIST('data',train=True,download=True,transform=data_trans_alexnet)
# test_data=datasets.MNIST('data',train=False,download=True,transform=data_trans_alexnet)

In [97]:
n_train=int(len(train_data)*0.9)
n_validation=len(train_data)-n_train

In [98]:
train_data,valid_data=torch.utils.data.random_split(train_data,[n_train,n_validation])

In [99]:
len(train_data)

45000

In [100]:
len(valid_data)

5000

In [101]:
len(test_data)

10000

In [102]:
train_data.dataset

Dataset CIFAR10
    Number of datapoints: 50000
    Split: train
    Root Location: data
    Transforms (if any): Compose(
                             Resize(size=32, interpolation=PIL.Image.BILINEAR)
                             ToTensor()
                             Normalize(mean=(0.1307,), std=(0.3081,))
                         )
    Target Transforms (if any): None

In [103]:
batch_size=64

目前完成了数据集的制作

In [104]:
train_iterator=torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=batch_size)
valid_iterator=torch.utils.data.DataLoader(valid_data,batch_size=batch_size)
test_iterator=torch.utils.data.DataLoader(test_data,batch_size=batch_size)

In [105]:
len(train_iterator)

704

#### 构建神经网络

In [109]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        #第一层conv1，因为是MNIST数据集，所有channel数是1，输出的channel是6,kernel_size是5*5
        self.conv1=nn.Conv2d(3,6,5)
        #第二层conv2,输入channel=6,输出channel=16，kernel5*5,input_size=14*14,output_size=10*10
        self.conv2=nn.Conv2d(6,16,5)
        
        self.fc1=nn.Linear(16*5*5,120)
        
        self.fc2=nn.Linear(120,84)
        
        self.fc3=nn.Linear(84,10)#不用增加softmax层，从推断的角度直接使用argmax就可以得到最终的预测结果，在cross_entropy函数中实现了softmax的功能
        
    def forward(self,x):#规定计算图架构
        out=F.max_pool2d(F.relu(self.conv1(x)),2)
        out=F.max_pool2d(F.relu(self.conv2(out)),2)
        out=out.view(out.shape[0],-1)
        out=F.relu(self.fc1(out))
        out=F.relu(self.fc2(out))
        out=self.fc3(out)
        return out
    
        

In [110]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.feature_block=nn.Sequential(
            nn.Conv2d(3,64,kernel_size=11,stride=4,padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Conv2d(64,192,kernel_size=5,padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Conv2d(192,384,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384,256,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2)
        )
        self.avgpool=nn.AdaptiveAvgPool2d((6,6))
        self.class_block=nn.Sequential(
            nn.Dropout(),
            nn.Linear(6*6*256,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096,10),
        )
    def forward(self,x):
        x=self.feature_block(x)
        x=self.avgpool(x)
        x=x.view(x.size(0),256*6*6)
        x=self.class_block(x)
        return x
        

In [111]:
class VGGBlock(nn.Module):
    def __init__(self,in_channel,out_channel,batch_norm):#改良后的新的VGGBlock
        super(VGGBlock,self).__init__()
        stack=[]
        stack.append(nn.Conv2d(in_channel,out_channel,kernel_size=3,padding=1))
        if batch_norm:
            stack.append(nn.BatchNorm2d(out_channel))
        stack.append(nn.ReLU(inplace=True))
        self.model_block=nn.Sequential(*stack)
    def forward(self,x):
        return self.model_block(x)

In [112]:
class VGGNet11(nn.Module):
    def __init__(self,block,pool,batch_norm):#block是一个网络模组抽象，pool也是pooling层的抽象
        super(VGGNet11,self).__init__()
        self.feature_block=nn.Sequential(
            block(3,64,batch_norm), #32*32
            pool(kernel_size=2,stride=2),#16*16
            block(64,128,batch_norm),
            pool(kernel_size=2,stride=2),#8*8
            block(128,256,batch_norm),
            block(256,256,batch_norm),
            pool(kernel_size=2,stride=2),#4*4
            block(256,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#2*2
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#1*1
        )
        self.classifier=nn.Linear(512,10)
        
    def forward(self,x):
        x=self.feature_block(x)
        x=x.view(x.shape[0],-1)
        x=self.classifier(x)
        return x

In [113]:
class VGGNet16(nn.Module):
    def __init__(self,block,pool,batch_norm):#block是一个网络模组抽象，pool也是pooling层的抽象
        super(VGGNet16,self).__init__()
        self.feature_block=nn.Sequential(
            block(3,64,batch_norm), #32*32
            block(64,64,batch_norm),
            pool(kernel_size=2,stride=2),#16*16
            block(64,128,batch_norm),
            block(128,128,batch_norm),
            pool(kernel_size=2,stride=2),#8*8
            block(128,256,batch_norm),
            block(256,256,batch_norm),
            pool(kernel_size=2,stride=2),#4*4
            block(256,512,batch_norm),
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#2*2
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#1*1
        )
        self.classifier=nn.Sequential(
            nn.Linear(512,1024),
            nn.Linear(1024,1024),
            nn.Linear(1024,10),
        )
        
        
    def forward(self,x):
        x=self.feature_block(x)
        x=x.view(x.shape[0],-1)
        x=self.classifier(x)
        return x

### GoogleNet

In [114]:
class Inception(nn.Module):
    def __init__(self,in_planes,n1x1,n3x3red,n3x3,n5x5red,n5x5,pool_planes):
        super(Inception,self).__init__()
        self.b1=nn.Sequential(
            nn.Conv2d(in_planes,n1x1,kernel_size=1),
            nn.BatchNorm2d(n1x1),
            nn.ReLU(True),
        )
        
        self.b2=nn.Sequential(
            nn.Conv2d(in_planes,n3x3red,kernel_size=1),
            nn.BatchNorm2d(n3x3red),
            nn.ReLU(True),
            nn.Conv2d(n3x3red,n3x3,kernel_size=3,padding=1),
            nn.BatchNorm2d(n3x3),
            nn.ReLU(True),
        )
        
        self.b3=nn.Sequential(
            nn.Conv2d(in_planes,n5x5red,kernel_size=1),
            nn.BatchNorm2d(n5x5red),
            nn.ReLU(True),
            nn.Conv2d(n5x5red,n5x5,kernel_size=5,padding=2),
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
        )
        
        self.b4=nn.Sequential(
            nn.MaxPool2d(3,stride=1,padding=1),
            nn.Conv2d(in_planes,pool_planes,kernel_size=1),
            nn.BatchNorm2d(pool_planes),
            nn.ReLU(True),
        )
        
    def forward(self,x):
        x1=self.b1(x)
        x2=self.b2(x)
        x3=self.b3(x)
        x4=self.b4(x)
        #concat4层输入在一起
        return torch.cat([x1,x2,x3,x4],1)

In [115]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet,self).__init__()
        self.feature_block=nn.Sequential(
            nn.Conv2d(3,192,kernel_size=3,padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        self.a3=Inception(192,64,96,128,16,32,32)
        self.b3=Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool=nn.AvgPool2d(8,stride=1)
        self.linear=nn.Linear(1024,10)
        
    def forward(self,x):
        out=self.feature_block(x)
        out=self.a3(out)
        out=self.b3(out)
        out=self.maxpool(out)
        out=self.a4(out)
        out=self.b4(out)
        out=self.c4(out)
        out=self.d4(out)
        out=self.e4(out)
        out=self.maxpool(out)
        out = self.a5(out)
        out = self.b5(out)
        out =self.avgpool(out)
        out =out.view(out.size(0),-1)
        out=self.linear(out)
        return out

In [116]:
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(ResNetBlock,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.downsample(x)#ResNet的add操作，其实是张量的加和
        out = F.relu(out)
        return out

class ResNetLayer(nn.Module):
    def __init__(self,block,n_blocks,in_channels,out_channels,stride):
        super(ResNetLayer,self).__init__()
        self.modules=[]
        self.modules.append(block(in_channels,out_channels,stride))
        for _ in range(n_blocks-1):
            self.modules.append(block(out_channels,out_channels,1))
        self.blocks=nn.Sequential(*self.modules)
        
    def forward(self,x):
        return self.blocks(x)
    
class ResNet18(nn.Module):
    def __init__(self,layer,block):
        super(ResNet18,self).__init__()
        n_blocks=[2,2,2,2]
        self.conv1=nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(64)
        self.rb1=layer(block,n_blocks[0],64,64,1)
        self.rb2=layer(block,n_blocks[1],64,128,2)
        self.rb3=layer(block,n_blocks[2],128,256,2)
        self.rb4=layer(block,n_blocks[3],256,512,2)
        self.fc=nn.Linear(512,10)
        
    def forward(self,x):
        out=F.relu(self.bn1(self.conv1(x)))
        out=self.rb1(out)
        out=self.rb2(out)
        out=self.rb3(out)
        out=self.rb4(out)
        out=F.avg_pool2d(out,4)
        out=out.view(out.shape[0],-1)
        out=self.fc(out)
        return out
        
#ResNet34->[3,4,6,3]


In [117]:
import math
class Bottleneck(nn.Module):
    def __init__(self,in_planes,growth_rate):
        super(Bottleneck,self).__init__()
        self.bn1=nn.BatchNorm2d(in_planes)
        self.conv1=nn.Conv2d(in_planes,4*growth_rate,kernel_size=1,bias=False)
        self.bn2=nn.BatchNorm2d(4*growth_rate)
        self.conv2=nn.Conv2d(4*growth_rate,growth_rate,kernel_size=3,padding=1,bias=False)
        
    def forward(self,x):
        out=self.conv1(F.relu(self.bn1(x)))#pre-activation
        out=self.conv2(F.relu(self.bn2(out)))
        out=torch.cat([out,x],1)
        return out
class Transition(nn.Module):
    def __init__(self,in_planes,out_planes):
        super(Transition,self).__init__()
        self.bn=nn.BatchNorm2d(in_planes)
        self.conv=nn.Conv2d(in_planes,out_planes,kernel_size=1,bias=False)
    def forward(self,x):
        out=self.conv(F.relu(self.bn(x)))
        out=F.avg_pool2d(out,2)
        return out
class DenseNet(nn.Module):
    def __init__(self,block,nblocks,growth_rate=12,reduction=0.5,num_classes=10):
        super(DenseNet,self).__init__()
        self.growth_rate=growth_rate
        num_planes=2*growth_rate #32
        #最初的感知层
        self.conv1=nn.Conv2d(3,num_planes,kernel_size=3,padding=1,bias=False)
        #第一个DenseBlock
        self.dense1=self._make_dense_layers(block,num_planes,nblocks[0])
        num_planes+=nblocks[0]*growth_rate
        out_planes=int(math.floor(num_planes*reduction))
        self.trans1=Transition(num_planes,out_planes)
        num_planes=out_planes
        #第二个DenseBlock
        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate#计算如果不压缩的话的输出
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes
        #第三个DenseBlock
        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes
        #第四个DenseBlock
        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate
        #分类层
        self.bn=nn.BatchNorm2d(num_planes)
        self.linear=nn.Linear(num_planes,num_classes)
    
    
    def _make_dense_layers(self,block,in_planes,nblock):
        #block:bottleneck
        #nblock代表构建denseblock中有多少bottleneck层
        layers=[]
        for i in range(nblock):
            layers.append(block(in_planes,self.growth_rate))
            in_planes+=self.growth_rate
        return nn.Sequential(*layers)
    
    def forward(self,x):
        out=self.conv1(x)
        out=self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out=F.avg_pool2d(F.relu(self.bn(out)),4)
        out=out.view(out.size(0),-1)
        out=self.linear(out)
        return out

def DenseNet121():
    return DenseNet(Bottleneck,[6,12,24,16],growth_rate=32)
    

#### ResNext

In [118]:
class Block(nn.Module):
    
    expansion = 2

    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
        super(Block, self).__init__()
        group_width = cardinality * bottleneck_width
        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(group_width)
        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(group_width)
        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*group_width:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*group_width)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNeXt(nn.Module):
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(num_blocks[0], 1)
        self.layer2 = self._make_layer(num_blocks[1], 2)
        self.layer3 = self._make_layer(num_blocks[2], 2)
        # self.layer4 = self._make_layer(num_blocks[3], 2)
        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)

    def _make_layer(self, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
        # Increase bottleneck_width by 2 after each stage.
        self.bottleneck_width *= 2
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        # out = self.layer4(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNeXt29_2x64d():
    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)

#### SqueezeNet

In [314]:
import torch.nn.init as init
class Fire(nn.Module):
    def __init__(self,inplanes,s1,e1,e3):
        super(Fire,self).__init__()
        self.inplanes=inplanes
        self.squeeze=nn.Conv2d(inplanes,s1,kernel_size=1)
        self.squeeze_activation=nn.ReLU(inplace=True)
        self.expand1x1=nn.Conv2d(s1,e1,kernel_size=1)
        self.expand1x1_activation=nn.ReLU(inplace=True)
        self.expand3x3=nn.Conv2d(s1,e3,kernel_size=True)
        self.expand3x3_activation=nn.ReLU(inplace=True)
    def forward(self,x):
        x=self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ],1)
    
class SqueezeNet(nn.Module):

    def __init__(self, version=1.0, num_classes=10):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            #nn.Dropout(p=0.5),
            final_conv,
            #nn.ReLU(inplace=True),
            #nn.AvgPool2d(4, stride=1)
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal(m.weight.data, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x.view(x.size(0), self.num_classes)

#### 载入模型并进行计算

In [315]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [316]:
model_dir='models'

if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

In [317]:
# model=LeNet().to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'lenet_mnist.pt')#保存训练好的模型的位置

In [318]:
# model=AlexNet().to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'alex_mnist.pt')#保存训练好的模型的位置

In [319]:
# model=VGGNet11(VGGBlock,nn.MaxPool2d,True).to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'vgg_mnist.pt')#保存训练好的模型的位置

In [320]:
# model=VGGNet16(VGGBlock,nn.MaxPool2d,True).to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'vgg16_mnist.pt')#保存训练好的模型的位置

In [321]:
# model=GoogLeNet().to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'googlenet_mnist.pt')#保存训练好的模型的位置

In [322]:
# model=ResNet18(ResNetLayer,ResNetBlock).to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'resnet18_mnist.pt')#保存训练好的模型的位置

In [323]:
# model=DenseNet121().to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'densenet121_mnist.pt')#保存训练好的模型的位置

In [324]:
# model=ResNeXt29_2x64d().to(device)
# model_path=os.path.join(model_dir,'resnext_29_2x64d_mnist.pt')

In [325]:
# model=LeNet().to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'lenet_cifar10.pt')#保存训练好的模型的位置

In [326]:
# model=ResNet18(ResNetLayer,ResNetBlock).to(device)#将神经网络对象加载到相应的内存或显存中
# model_path=os.path.join(model_dir,'resnet18_cifar10.pt')#保存训练好的模型的位置

In [327]:
model=SqueezeNet(version=1.0).to(device)#将神经网络对象加载到相应的内存或显存中
model_path=os.path.join(model_dir,'squeezenet_cifar10.pt')#保存训练好的模型的位置



In [328]:
model

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(True, True), stride=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(True, True), stride=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (5): Fire(
      (squeeze): Conv2d(128, 32, kernel_size=(1, 1), strid

In [329]:
optimizer=optim.Adam(model.parameters())

In [330]:
criterion=nn.CrossEntropyLoss()

In [331]:
#criterion

In [332]:
def accu(fx,y):
    pred=fx.max(1,keepdim=True)[1]
    correct=pred.eq(y.view_as(pred)).sum()
    acc=correct.float()/pred.shape[0]
    return acc

In [333]:
#训练一个epoch
def train(model,device,iterator,optimizer,criterion):
    epoch_loss=0#积累变量
    epoch_acc=0#积累变量
    model.train()#该函数表示PHASE=Train,自动求导以及求导运算将会被激活
    
    for (x,y) in iterator:#拿每一个minibatch
#         print(x,y)
        x=x.to(device)
        y=y.to(device)
        optimizer.zero_grad()#将所有的梯度变量清零
        fx=model(x)#进行inference
        loss=criterion(fx,y)#计算train_loss
        acc=accu(fx,y)#计算train_acc
        loss.backward()#进行bp回算各参数和神经元的梯度
        optimizer.step()#统一进行梯度下降的更新
        epoch_loss+=loss.item()
        epoch_acc+=acc.item()
    
    #返回平均训练Loss和平均训练Accu
    return epoch_loss/len(iterator),epoch_acc/len(iterator)


In [334]:
#评测一个验证集，不用梯度下降，只是进行推断和误差计算
def evaluate(model,device,iterator,criterion):
    epoch_loss=0
    epoch_acc=0
    model.eval()#PHASE=Eval,不会增加梯度的存储变量和计算单元
    with torch.no_grad():
        for (x,y) in iterator:
            x=x.to(device)
        
            y=y.to(device)
            fx=model(x)
            loss=criterion(fx,y)
            acc=accu(fx,y)
            epoch_loss+=loss.item()
            epoch_acc+=acc.item()
            
    return epoch_loss/len(iterator),epoch_acc/len(iterator)

#### 开始训练

In [342]:
epochs=100

In [343]:
best_valid_loss=float('inf')#自动筛选最优模型并保存

In [None]:
for epoch in range(epochs):
    train_loss,train_acc=train(model,device,train_iterator,optimizer,criterion)
    valid_loss,valid_acc=evaluate(model,device,valid_iterator,criterion)
    if valid_loss<best_valid_loss:#当前模型好于历史最好模型
        best_valid_loss=valid_loss
        torch.save(model.state_dict(),model_path)#模型文件的更新
    print('Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'.format(epoch+1,train_loss,train_acc,valid_loss,valid_acc))

Epoch:1|Train Loss:1.41294204359|Train Acc:0.476185191761|Val Loss:1.45674347425|Val Acc:0.455498417722
Epoch:2|Train Loss:1.36020674082|Train Acc:0.497447620739|Val Loss:1.37036217391|Val Acc:0.491693037975
Epoch:3|Train Loss:1.36073239757|Train Acc:0.496027166193|Val Loss:1.42691243751|Val Acc:0.474485759494
Epoch:4|Train Loss:1.35324021615|Train Acc:0.500710227273|Val Loss:1.43550501292|Val Acc:0.464200949367
Epoch:5|Train Loss:1.35542793563|Train Acc:0.496493252841|Val Loss:1.39609891704|Val Acc:0.479034810127
Epoch:6|Train Loss:1.36374496745|Train Acc:0.499045632102|Val Loss:1.37437524524|Val Acc:0.494462025316
Epoch:7|Train Loss:1.34163775287|Train Acc:0.505038174716|Val Loss:1.4011049829|Val Acc:0.479628164557
Epoch:8|Train Loss:1.3294310988|Train Acc:0.511563387784|Val Loss:1.38756002203|Val Acc:0.487143987342
Epoch:9|Train Loss:1.35666103077|Train Acc:0.498668323864|Val Loss:1.39069735098|Val Acc:0.480617088608
Epoch:10|Train Loss:1.33876824311|Train Acc:0.508278586648|Val Los

In [None]:
model.load_state_dict(torch.load(model_path))#load最好结果到model

In [None]:
test_loss,test_acc=evaluate(model,device,test_iterator,criterion)
print('Test Loss:{0}|Test Acc:{1}'.format(test_loss,test_acc))

作业:

请大家实现ResNet50/101/152->只需要成功的跑起来即可