In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 神经网络

In [None]:
import torch 
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)  # 1 input image channel, 6 output channels, 5*5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5) 
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # x.size() - (batchsize, channels, height, width), [1:]切片为（c, h, w）
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
print(net)

In [None]:
params = list(net.parameters())
print(params)
print(params[0].size())

In [None]:
input = torch.randn(1, 1, 32, 32)  # (bachsize, channels, height,width)
out = net(input)
print(out)

In [None]:
net.zero_grad() # 将所有参数的梯度缓存清零
out.backward(torch.rand(1, 10)) # 随机梯度的反向传播

## 损失函数

In [None]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)


现在，如果在反向过程中跟随loss ， 使用它的 .grad_fn 属性，将看到如下所示的计算图。

input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss

所以，当我们调用 loss.backward()时,整张计算图都会 根据loss进行微分，而且图中所有设置为requires_grad=True的张量 将会拥有一个随着梯度累积的.grad 张量。

In [None]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

## 反向传播

In [None]:
net.zero_grad()  # 调用前需要清除已存在的梯度，否则梯度将被累加到已存在的梯度
 
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()  # 获得反向传播的误差

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

## 更新权重

In [None]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

# 训练一个分类器
1. 使用torchvision加载和归一化CIFAR10训练集和测试集
2. 定义一个卷积神经网络
3. 定义损失函数
4. 在训练集上训练网络
5. 在测试集上测试网络

## 1.读取和归一化CIFAR10数据集

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
transform = transforms.Compose(                 # 参数为list列表
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 均值 标准差 image = (image - mean) / std
    ]
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
import matplotlib.pyplot as plt

import numpy as np

def imshow(img):
    img = img / 2 + 0.5    # 去正则化  image = ((image * std) + mean)
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))   # tranpose做转置
    
# 获取随机数据   
dataiter = iter(trainloader)      # 每次迭代取的是一个batch
images, labels = dataiter.next()  # 如果batch_size为4，则取出来的images是4×c×h×w的tensor，labels是1×4的向量
print(images.size())

# 展示图片
imshow(torchvision.utils.make_grid(images)) # make_grid将若干图拼成一张

print(''.join('%10s' % classes[labels[j]] for j in range(4)))

## 定义一个卷积神经网络
从之前的神经网络一节复制神经网络代码，并修改为输入3通道图像

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
net = Net()
print(net)

## 定义损失函数和优化器

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum=0.9)

## 训练网络

In [None]:
for epoch in range(2):
    
    running_loss = 0.0
                                             
    for i, data in enumerate(trainloader, 0): # the return of enumerate is dictory
        # get inputs data
        inputs, labels = data    # the type of data is list

        # set gradient to 0
        optimizer.zero_grad()

        # forward, backward, optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print info
        running_loss += loss.item()
        if i % 2000 == 1999: 
            print('[%d %5d] loss:%0.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finish Training')

## 测试

In [None]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('GT: ', ' '.join('%6s' % classes[labels[j]] for j in range(4)))

In [None]:
outputs = net(images)  # 通过网络进行预测
print(outputs)
print(outputs.data)

In [None]:
_, predicted = torch.max(outputs, 1)  # the return of torch.max is [values, indies]
print(predicted)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

In [None]:
# 测试整个测试集
correct = 0.0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)  # 1 为找行的最大值，0为列
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print("Acc of the net on the 10000 test imgs : %d %%" % (100 * correct / total)) 

在识别哪一个类的时候好，哪一个不好呢？

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# MNIST数据集手写数字识别

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, models, transforms

## 设置超函数

In [2]:
Batch_size = 512
Epochs = 20
Device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 设置GPU训练

In [8]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                  transform=transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1307, ), (0.3081, ))  # 归一化
                  ])),
    batch_size=Batch_size, shuffle=True
)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, 
                  transform=transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1307, ), (0.3081, ))
                  ])),
    batch_size=Batch_size, shuffle=True
)

## 网络计算
output = (n - f + 2p) / s + 1 
- n 是输入的大小
- f 是卷积核大小
- p 是padding大小
- s 是步长

In [9]:
class MNIST_NET(nn.Module):
    def __init__(self):
        super(MNIST_NET, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5) # 输入， 输出， 卷积核大小 
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20 * 10 * 10, 500)
        self.fc2 = nn.Linear(500, 10)
    
    def forward(self, x):
        in_size = x.size(0)  # x 的输入为（512,1,1,28）, 所以in_size = 512
        x = F.max_pool2d(F.relu(self.conv1(x)), 2, 2)
        x = F.relu(self.conv2(x))
        x = x.view(in_size, -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1) # 按行进行
        return x        

In [12]:
model = MNIST_NET().to(Device)
optimizer = optim.Adam(model.parameters())

In [14]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()  # 告诉网络现在要训练，会影响BN 以及 dropout
    for batch_idx, datas in enumerate(train_loader):
        data, target = datas
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad() # 梯度归零
        output = model(data)  # 将数据送入网络
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        if(batch_idx + 1) % 30 == 0:
            print("Train Epoch: {} [{} / {} ({:.0f}%)]\tLoss:{:.6f}".format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))

In [15]:
def test(model , device, test_loader):
    model.eval()
    test_loss = 0.
    correct = 0. 
    with torch.no_grad():  # 不进行梯度下降
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # 将同一batch的损失相加
            pred = output.max(1, keepdim=True)[1]  # 返回最大值的位置
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)
    print("\nTest set:Avg loss:{:.4f}, Acc:{}/{} ({:.0f}%)\n".format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
for epoch in range(1, Epochs + 1):
    train(model, Device, train_loader, optimizer, epoch)
    test(model, Device, test_loader)