In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,32,3,1)#in_channels,out_channels,kernel_size,stride
        self.conv2 = nn.Conv2d(32,64,3,1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216,128)#in_channels,out_channels
        self.fc2 = nn.Linear(128,10)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        x = self.dropout1(x)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x,dim=1)
        return output

In [16]:
def train(model,device,train_loader,optimizer,epoch,log_interval):
    model.train()#切换到训练状态
    for batch_idx, (data,target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output,target)#negative log likelihood, nll_loss+log_softmax = cross_entropy
        loss.backward()#反向计算，计算当前的梯度
        optimizer.step()#更新网络参数
        if batch_idx%log_interval == 0:
            print('Train Epoch:{}[{}/{}({:.0f}%)]\tLoss:{:.6f}'.format(epoch,batch_idx*len(data), len(train_loader.dataset),100.*batch_idx/len(train_loader),loss.item()))

In [4]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data,target in test_loader:
            data, target = data.to(device),target.to(device)
            output = model(data)
            test_loss += F.nll_loss(out,target,reduction = 'sum').item()
            pred = out.argmax(dim = 1, keepdim = True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    
    print('\nTest set: Average loss{:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss,correct,len(test_loader.dataset),100.*correct/len(test_loader.dataset)))

In [5]:
from torchvision import datasets,transforms

In [6]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])#mean = 0.1307, std = 0.3081

In [7]:
dataset1 = datasets.MNIST('../data', train=True, download=True, transform = transform)
dataset2 = datasets.MNIST('../data', train=False, transform = transform)

In [9]:
train_loader = torch.utils.data.DataLoader(dataset1,batch_size = 10)
test_loader = torch.utils.data.DataLoader(dataset2,batch_size = 10)

In [14]:
torch.manual_seed(0)
device = torch.device("cpu")
epoch_size = 20
log_interval = 10

In [11]:
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(),lr =1)

In [13]:
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer,step_size = 1, gamma = 0.7)
#learning_rate decay 每次调用这个函数，lr = lr*0.7

In [None]:
for epoch in range(1,epoch_size+1):
    train(model,device,train_loader,optimizer,epoch,log_interval)
    test(model,device,test_loader)
    scheduler.step()



