## 0. 基础篇

In [1]:
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [2]:
batch_size=64
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])  # 均值&标准差
train=datasets.MNIST(root='./data/StartData/mnist',train=True,download=True,transform=transform)
train_loader=DataLoader(train,shuffle=True,batch_size=batch_size)
test=datasets.MNIST(root='./data/StartData/mnist',train=False,download=True,transform=transform)
test_loader=DataLoader(test,shuffle=False,batch_size=batch_size)

In [3]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=torch.nn.Conv2d(1,10,kernel_size=5)
        self.conv2=torch.nn.Conv2d(10,20,kernel_size=5)
        self.pooling=torch.nn.MaxPool2d(2)
        self.fc=torch.nn.Linear(320,10)
    
    def forward(self,x):
        # Flatten data from (n,1,28,28) to (n,784)
        batch_size=x.size(0)  # 与之前多了个batchsize，注意！
        x=F.relu(self.pooling(self.conv1(x)))
        x=F.relu(self.pooling(self.conv2(x)))
        x=x.view(batch_size,-1)  # 这里是将特征信息进行linear方式的拼接
        x=self.fc(x)
        return x  # 最后一层不激活，因为CrossEntropy

model=Net()
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

Net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=320, out_features=10, bias=True)
)

In [4]:
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=0.01,momentum=0.5)  # 冲量momentum优化训练过程(下降很快，冲破局部极小值)

In [5]:
def train(epoch):
    running_loss=0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        optimizer.zero_grad()
        
        outputs=model(inputs)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        
        running_loss+=loss.item()
        if batch_idx%300==299:
            print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
            running_loss=0.0

In [6]:
def test():
    correct=0
    total=0
    with torch.no_grad():
        for data in test_loader:
            images,labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=model(images)
            _,predicted=torch.max(outputs.data,dim=1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    print('Accuracy on test set:%d %%'%(100*correct/total))

In [7]:
if __name__=='__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,  300] loss:0.654
[1,  600] loss:0.185
[1,  900] loss:0.133
Accuracy on test set:96 %
[2,  300] loss:0.106
[2,  600] loss:0.097
[2,  900] loss:0.086
Accuracy on test set:97 %
[3,  300] loss:0.077
[3,  600] loss:0.070
[3,  900] loss:0.072
Accuracy on test set:98 %
[4,  300] loss:0.059
[4,  600] loss:0.067
[4,  900] loss:0.059
Accuracy on test set:98 %
[5,  300] loss:0.052
[5,  600] loss:0.057
[5,  900] loss:0.056
Accuracy on test set:98 %
[6,  300] loss:0.048
[6,  600] loss:0.046
[6,  900] loss:0.051
Accuracy on test set:98 %
[7,  300] loss:0.041
[7,  600] loss:0.044
[7,  900] loss:0.048
Accuracy on test set:98 %
[8,  300] loss:0.041
[8,  600] loss:0.042
[8,  900] loss:0.041
Accuracy on test set:98 %
[9,  300] loss:0.040
[9,  600] loss:0.036
[9,  900] loss:0.042
Accuracy on test set:98 %
[10,  300] loss:0.035
[10,  600] loss:0.034
[10,  900] loss:0.040
Accuracy on test set:98 %


## 1. 高级篇
* GoogleNet
* ResNet

### 1.1 GoogleNet

In [1]:
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [2]:
batch_size=64
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])  # 均值&标准差
train=datasets.MNIST(root='./data/StartData/mnist',train=True,download=True,transform=transform)
train_loader=DataLoader(train,shuffle=True,batch_size=batch_size)
test=datasets.MNIST(root='./data/StartData/mnist',train=False,download=True,transform=transform)
test_loader=DataLoader(test,shuffle=False,batch_size=batch_size)

In [3]:
class InceptionA(torch.nn.Module):
    def __init__(self,in_channels):
        super(InceptionA,self).__init__()
        self.branch1x1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
        
        self.branch5x5_1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
        self.branch5x5_2=torch.nn.Conv2d(16,24,kernel_size=5,padding=2)
        
        self.branch3x3_1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
        self.branch3x3_2=torch.nn.Conv2d(16,24,kernel_size=3,padding=1)
        self.branch3x3_3=torch.nn.Conv2d(24,24,kernel_size=3,padding=1)
        
        self.branch_pool=torch.nn.Conv2d(in_channels,24,kernel_size=1)
        
    def forward(self,x):
        branch1x1=self.branch1x1(x)
        
        branch5x5=self.branch5x5_1(x)
        branch5x5=self.branch5x5_2(branch5x5)
        
        branch3x3=self.branch3x3_1(x)
        branch3x3=self.branch3x3_2(branch3x3)
        branch3x3=self.branch3x3_3(branch3x3)
        
        branch_pool=F.avg_pool2d(x,kernel_size=3,stride=1,padding=1)
        branch_pool=self.branch_pool(branch_pool)
        
        outputs=[branch1x1,branch5x5,branch3x3,branch_pool]
        
        return torch.cat(outputs,dim=1)
    
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=torch.nn.Conv2d(1,10,kernel_size=5)
        self.conv2=torch.nn.Conv2d(88,20,kernel_size=5)
        
        self.incep1=InceptionA(in_channels=10)
        self.incep2=InceptionA(in_channels=20)
        
        self.mp=torch.nn.MaxPool2d(2)
        self.fc=torch.nn.Linear(1408,10)
        
    def forward(self,x):
        batch_size=x.size(0)
        x=F.relu(self.mp(self.conv1(x)))
        x=self.incep1(x)
        x=F.relu(self.mp(self.conv2(x)))
        x=self.incep2(x)
        x=x.view(batch_size,-1)
        x=self.fc(x)
        return x
    
model=Net()
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

In [5]:
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=0.01,momentum=0.5)  # 冲量momentum优化训练过程(下降很快，冲破局部极小值)

In [6]:
def train(epoch):
    running_loss=0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        optimizer.zero_grad()
        
        outputs=model(inputs)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        
        running_loss+=loss.item()
        if batch_idx%300==299:
            print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
            running_loss=0.0

def test():
    correct=0
    total=0
    with torch.no_grad():
        for data in test_loader:
            images,labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=model(images)
            _,predicted=torch.max(outputs.data,dim=1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    print('Accuracy on test set:%d %%'%(100*correct/total))

In [7]:
if __name__=='__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,  300] loss:0.796
[1,  600] loss:0.205
[1,  900] loss:0.137
Accuracy on test set:96 %
[2,  300] loss:0.109
[2,  600] loss:0.100
[2,  900] loss:0.095
Accuracy on test set:97 %
[3,  300] loss:0.082
[3,  600] loss:0.078
[3,  900] loss:0.073
Accuracy on test set:97 %
[4,  300] loss:0.068
[4,  600] loss:0.061
[4,  900] loss:0.065
Accuracy on test set:98 %
[5,  300] loss:0.054
[5,  600] loss:0.056
[5,  900] loss:0.060
Accuracy on test set:98 %
[6,  300] loss:0.051
[6,  600] loss:0.048
[6,  900] loss:0.055
Accuracy on test set:98 %
[7,  300] loss:0.046
[7,  600] loss:0.046
[7,  900] loss:0.044
Accuracy on test set:98 %
[8,  300] loss:0.045
[8,  600] loss:0.041
[8,  900] loss:0.039
Accuracy on test set:98 %
[9,  300] loss:0.035
[9,  600] loss:0.037
[9,  900] loss:0.042
Accuracy on test set:98 %
[10,  300] loss:0.034
[10,  600] loss:0.036
[10,  900] loss:0.037
Accuracy on test set:98 %


### 1.2 ResNet

In [1]:
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [2]:
batch_size=64
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])  # 均值&标准差
train=datasets.MNIST(root='./data/StartData/mnist',train=True,download=True,transform=transform)
train_loader=DataLoader(train,shuffle=True,batch_size=batch_size)
test=datasets.MNIST(root='./data/StartData/mnist',train=False,download=True,transform=transform)
test_loader=DataLoader(test,shuffle=False,batch_size=batch_size)

In [3]:
class ResidualBlock(torch.nn.Module):
    def __init__(self,channels):
        super(ResidualBlock,self).__init__()
        self.channels=channels
        self.conv1=torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
        self.conv2=torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
        
    def forward(self,x):
        y=F.relu(self.conv1(x))
        y=self.conv2(y)
        return F.relu(x+y)
    
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=torch.nn.Conv2d(1,16,kernel_size=5)
        self.conv2=torch.nn.Conv2d(16,32,kernel_size=5)
        self.mp=torch.nn.MaxPool2d(2)
        
        self.rblock1=ResidualBlock(16)
        self.rblock2=ResidualBlock(32)
        self.fc=torch.nn.Linear(512,10)
        
    def forward(self,x):
        batch_size=x.size(0)
        x=self.mp(F.relu(self.conv1(x)))
        x=self.rblock1(x)
        x=self.mp(F.relu(self.conv2(x)))
        x=self.rblock2(x)
        x=x.view(batch_size,-1)
        x=self.fc(x)
        return x
    
model=Net()
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

Net(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (mp): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (rblock1): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (rblock2): ResidualBlock(
    (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (fc): Linear(in_features=512, out_features=10, bias=True)
)

In [4]:
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=0.1,momentum=0.5)  # 冲量momentum优化训练过程(下降很快，冲破局部极小值)

In [5]:
def train(epoch):
    running_loss=0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        optimizer.zero_grad()
        
        outputs=model(inputs)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        
        running_loss+=loss.item()
        if batch_idx%300==299:
            print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
            running_loss=0.0

def test():
    correct=0
    total=0
    with torch.no_grad():
        for data in test_loader:
            images,labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=model(images)
            _,predicted=torch.max(outputs.data,dim=1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    print('Accuracy on test set:%d %%'%(100*correct/total))

In [6]:
if __name__=='__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,  300] loss:0.230
[1,  600] loss:0.079
[1,  900] loss:0.064
Accuracy on test set:98 %
[2,  300] loss:0.042
[2,  600] loss:0.039
[2,  900] loss:0.038
Accuracy on test set:98 %
[3,  300] loss:0.028
[3,  600] loss:0.027
[3,  900] loss:0.030
Accuracy on test set:98 %
[4,  300] loss:0.018
[4,  600] loss:0.023
[4,  900] loss:0.024
Accuracy on test set:99 %
[5,  300] loss:0.016
[5,  600] loss:0.016
[5,  900] loss:0.014
Accuracy on test set:99 %
[6,  300] loss:0.011
[6,  600] loss:0.014
[6,  900] loss:0.014
Accuracy on test set:99 %
[7,  300] loss:0.008
[7,  600] loss:0.015
[7,  900] loss:0.013
Accuracy on test set:99 %
[8,  300] loss:0.006
[8,  600] loss:0.008
[8,  900] loss:0.010
Accuracy on test set:99 %
[9,  300] loss:0.006
[9,  600] loss:0.004
[9,  900] loss:0.012
Accuracy on test set:99 %
[10,  300] loss:0.005
[10,  600] loss:0.005
[10,  900] loss:0.008
Accuracy on test set:99 %
