# 目录
* 创建tensor
* 查看tensor的基本属性
* Squeeze and unsqueeze
* Reshape
* Variable
* 计算导数
* 制作数据集
* 载入数据 dataloader
* 处理batch内的数据 collate_fn
* 构造网络基本操作
    * 各种层
    * 训练需要的操作
    * 初始化
    * 模型保存

In [3]:
#载入Pytorch
import torch
import numpy as np

## 创建tensor

In [5]:
numpy_tensor = np.random.randn(10, 20)
pytorch_tensor1 = torch.Tensor(numpy_tensor)
pytorch_tensor2 = torch.from_numpy(numpy_tensor)
#or
tensor = torch.randn(10, 20)

#放到gpu上
gpu_tensor = torch.randn(10, 20).cuda(0) # 将 tensor 放到第一个 GPU 上
cpu_tensor = gpu_tensor.cpu()

## 查看tensor的基本属性

In [None]:
# 可以通过下面两种方式得到 tensor 的大小
print(pytorch_tensor1.shape)
print(pytorch_tensor1.size())
# 得到 tensor 的数据类型
print(pytorch_tensor1.type())
# 得到 tensor 的维度
print(pytorch_tensor1.dim())
# 得到 tensor 的所有元素个数
print(pytorch_tensor1.numel())

In [6]:
x = torch.randn(4, 3)
print(x)
# 沿着行取最大值
max_value, max_idx = torch.max(x, dim=1)
# 沿着行对 x 求和
sum_x = torch.sum(x, dim=1)
print(sum_x)


-0.1318 -0.1992  0.3353
-1.0342  1.1241  1.2663
 0.5975 -0.9291 -0.3803
-0.5924  0.5201 -0.8944
[torch.FloatTensor of size 4x3]


 0.0043
 1.3562
-0.7119
-0.9667
[torch.FloatTensor of size 4]



## Squeeze and unsqueeze

In [7]:
# 增加维度或者减少维度
print(x.shape)
x = x.unsqueeze(0) # 在第一维增加
print(x.shape)
x = x.unsqueeze(1) # 在第二维增加
print(x.shape)

torch.Size([4, 3])
torch.Size([1, 4, 3])
torch.Size([1, 1, 4, 3])


In [8]:
x = x.squeeze(0) # 减少第一维
print(x.shape)
x = x.squeeze() # 将 tensor 中所有的一维全部都去掉
print(x.shape)

torch.Size([1, 4, 3])
torch.Size([4, 3])


## Reshape

In [9]:
x = torch.randn(3, 4, 5)
print(x.shape)

# 使用permute和transpose进行维度交换
x = x.permute(1, 0, 2) # permute 可以重新排列 tensor 的维度
print(x.shape)

x = x.transpose(0, 2)  # transpose 交换 tensor 中的两个维度
print(x.shape)

torch.Size([3, 4, 5])
torch.Size([4, 3, 5])
torch.Size([5, 3, 4])


In [10]:
# 使用 view 对 tensor 进行 reshape
x = torch.randn(3, 4, 5)
print(x.shape)

x = x.view(-1, 5) # -1 表示任意的大小，5 表示第二维变成 5
print(x.shape)

x = x.view(3, 20) # 重新 reshape 成 (3, 20) 的大小
print(x.shape)

torch.Size([3, 4, 5])
torch.Size([12, 5])
torch.Size([3, 20])


## Variable

In [11]:
# 通过下面这种方式导入 Variable
from torch.autograd import Variable

In [39]:
x_tensor = torch.randn(10, 5)
y_tensor = torch.randn(5, 10)

# 将 tensor 变成 Variable
x = Variable(x_tensor, requires_grad=True) # 默认 Variable 是不需要求梯度的，所以我们用这个方式申明需要对其进行求梯度
y = Variable(y_tensor, requires_grad=True)

## 计算导数

In [40]:
#z = torch.sum(x + y)
z = torch.sum(torch.mm(x,y))
#打出了 z 中的 tensor 数值，同时通过grad_fn知道了其是通过 Sum 这种方式得到的, grad_fn会记录梯度传递的方式
print(z.data)
print(z.grad_fn)


-8.0746
[torch.FloatTensor of size 1]

<SumBackward0 object at 0x7fc7381c3690>


In [41]:
# 求 x 和 y 的梯度
z.backward()
print torch.sum(x.data,dim=0)
print y.data
print(x.grad)
print(y.grad)


 0.0447
 0.9534
-2.3593
 2.4125
 0.5277
[torch.FloatTensor of size 5]


-1.4006 -0.3198  0.6183 -0.7934  0.7652 -0.8130  0.0484  0.4200 -1.4913  0.1775
 0.0587  0.1683  0.7999 -0.5564  0.3898 -0.4051 -1.2345  1.2448 -0.7991  1.8380
-0.5075 -1.4926  0.7119  0.3476  2.4557  0.1621  1.9239  0.1158 -0.5363 -0.1629
-0.0153 -0.0329 -0.3753 -1.8673  0.5805 -0.6526 -1.3901  0.7534  1.0425  1.5788
-1.8903  1.8642  1.1450 -2.0341 -1.6079 -0.5391 -1.6208  1.5804  0.4074  0.1336
[torch.FloatTensor of size 5x10]

Variable containing:
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
-2.7886  1.5045  3.0177 -0.3784 -2.5616
[torch.FloatTensor of size 10x5]

Variable containing:
 0.0447  0.0447  

## 计算导数例子

In [46]:
#输出结果为标量
x = Variable(torch.FloatTensor([2]), requires_grad=True)
y = x ** 3
print y.grad_fn

y.backward()
print(x.grad)

<PowBackward0 object at 0x7fc7391afb90>
Variable containing:
 12
[torch.FloatTensor of size 1]



In [48]:
#输出结果为向量
m = Variable(torch.FloatTensor([[2, 2]]), requires_grad=True) # 构建一个 1 x 2 的矩阵
n = Variable(torch.zeros(1, 2)) # 构建一个相同大小的 0 矩阵
print(m)
print(n)
# 通过 m 中的值计算新的 n 中的值
n[0, 0] = m[0, 0] ** 2
n[0, 1] = m[0, 1] ** 3
print(n)
n.backward(torch.ones_like(n)) # 将 (w0, w1) 取成 (1, 1)
print(m.grad)

Variable containing:
 2  2
[torch.FloatTensor of size 1x2]

Variable containing:
 0  0
[torch.FloatTensor of size 1x2]

Variable containing:
 4  8
[torch.FloatTensor of size 1x2]

Variable containing:
  4  12
[torch.FloatTensor of size 1x2]



## 多次自动求导

In [50]:
x = Variable(torch.FloatTensor([3]), requires_grad=True)
y = x * 2 + x ** 2 + 3
print(y)
y.backward(retain_graph=True) # 设置 retain_graph 为 True 来保留计算图
print(x.grad)
y.backward() # 再做一次自动求导，这次不保留计算图
print(x.grad)
#这样计算了两次的梯度，两次梯度相加

Variable containing:
 18
[torch.FloatTensor of size 1]

Variable containing:
 8
[torch.FloatTensor of size 1]

Variable containing:
 16
[torch.FloatTensor of size 1]



**小练习**

定义

$$
x = 
\left[
\begin{matrix}
x_0 \\
x_1
\end{matrix}
\right] = 
\left[
\begin{matrix}
2 \\
3
\end{matrix}
\right]
$$

$$
k = (k_0,\ k_1) = (x_0^2 + 3 x_1,\ 2 x_0 + x_1^2)
$$

我们希望求得

$$
j = \left[
\begin{matrix}
\frac{\partial k_0}{\partial x_0} & \frac{\partial k_0}{\partial x_1} \\
\frac{\partial k_1}{\partial x_0} & \frac{\partial k_1}{\partial x_1}
\end{matrix}
\right]
$$

参考答案：

$$
\left[
\begin{matrix}
4 & 3 \\
2 & 6 \\
\end{matrix}
\right]
$$

In [53]:
x = Variable(torch.FloatTensor([2, 3]), requires_grad=True)
k = Variable(torch.zeros(2))

k[0] = x[0] ** 2 + 3 * x[1]
k[1] = x[1] ** 2 + 2 * x[0]
print(k)

j = torch.zeros(2, 2)

k.backward(torch.FloatTensor([1, 0]), retain_graph=True)
j[0] = x.grad.data

x.grad.data.zero_() # 归零之前求得的梯度

k.backward(torch.FloatTensor([0, 1]))
j[1] = x.grad.data

print(j)

Variable containing:
 13
 13
[torch.FloatTensor of size 2]


 4  3
 2  6
[torch.FloatTensor of size 2x2]



## 制作数据集

In [None]:
from torch.utils.data import Dataset
# 定义一个子类叫 custom_dataset，继承与 Dataset
class custom_dataset(Dataset):
    def __init__(self, txt_path, transform=None):
        self.transform = transform # 传入数据预处理
        with open(txt_path, 'r') as f:
            lines = f.readlines()
        
        self.img_list = [i.split()[0] for i in lines] # 得到所有的图像名字
        self.label_list = [i.split()[1] for i in lines] # 得到所有的 label 

    def __getitem__(self, idx): # 根据 idx 取出其中一个
        img = self.img_list[idx]
        label = self.label_list[idx]
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self): # 总数据的多少
        return len(self.label_list)
txt_dataset = custom_dataset('./example_data/train.txt') # 读入 txt 文件
data, label = txt_dataset[0]

## 载入数据 dataloader

In [None]:
from torch.utils.data import DataLoader
train_data1 = DataLoader(folder_set, batch_size=2, shuffle=True) # 将 2 个数据作为一个 batch
#apply
for im, label in train_data1: # 访问迭代器
    print(label)

## 处理batch内的数据

In [None]:
def collate_fn(batch):
    batch.sort(key=lambda x: len(x[1]), reverse=True) # 将数据集按照 label 的长度从大到小排序
    img, label = zip(*batch) # 将数据和 label 配对取出
    # 填充
    pad_label = []
    lens = []
    max_len = len(label[0])
    for i in range(len(label)):
        temp_label = label[i]
        temp_label += '0' * (max_len - len(label[i]))
        pad_label.append(temp_label)
        lens.append(len(label[i]))
    pad_label 
    return img, pad_label, lens # 输出 label 的真实长度
train_data3 = DataLoader(txt_dataset, 8, True, collate_fn=collate_fn) # batch size 设置为 8

## 构造网络基本操作

In [None]:
#数据载入
from torch.utils.data import DataLoader
# 使用 pytorch 自带的 DataLoader 定义一个数据迭代器
#train_set: 自定义一个Dataset: train_set
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
#查看一个batch的数据
a, a_label = next(iter(train_data))

## 简单网络

In [None]:
from torch import nn
# 使用 Sequential 定义 4 层神经网络（全连接层）
net = nn.Sequential(
    nn.Linear(784, 400),
    nn.ReLU(),
    nn.Linear(400, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

## 各种层

In [None]:
from torch import nn
#卷积层
conv = nn.Conv2d(in_channel, out_channel, kernel, stride, padding)
#卷积后的图像：(N - Kernel + 2 * padding) / stride + 1
#卷积需要的参数： weights: Kernel *Kernel * in_channel * out_channel  Bias:  out_channel
#感受野计算      
'''RF = 1 #待计算的feature map上的感受野大小
　　for layer in （top layer To down layer）:
　　　　RF = ((RF -1)* stride) + fsize
'''

#批标准层
bn = nn.BatchNorm2d(out_channel, eps=1e-3),

#Relu激活层
nn.ReLU(True)

#池化层
maxpool = nn.MaxPool2d(3, stride=1, padding=1),
#池化后的图像：(N - Kernel + 2*padding)/stride + 1
#不需要参数

#全连接层
fc = nn.Linear(400, 200)

## 训练需要的操作

In [None]:
#Loss函数
criterion = nn.CrossEntropyLoss()
#优化函数
optimizer = torch.optim.SGD(net.parameters(), 1e-1) # 使用随机梯度下降，学习率 0.1

for im, label in train_data:
        im = Variable(im)
        label = Variable(label)
        # 前向传播
        out = net(im)
        loss = criterion(out, label)
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 记录误差
        train_loss += loss.data[0]
        # 计算分类的准确率
        _, pred = out.max(1)
        num_correct = (pred == label).sum().data[0]
        acc = num_correct / im.shape[0]
        train_acc += acc

## 初始化

In [None]:
import numpy as np
import torch
from torch import nn
# 定义一个 Sequential 模型
net1 = nn.Sequential(
    nn.Linear(30, 40),
    nn.ReLU(),
    nn.Linear(40, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)
net1[0].weight.data = torch.from_numpy(np.random.uniform(3, 5, size=(40, 30)))

#or
for layer in net1:
    if isinstance(layer, nn.Linear): # 判断是否是线性层
        param_shape = layer.weight.shape
        layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape)) 
        # 定义为均值为 0，方差为 0.5 的正态分布

#or
from torch.nn import init
init.xavier_uniform(net1[0].weight)

fc = nn.Linear(400, 200)
#fc = fc.cuda()

#mean, std归一化
#fc.weight.data.normal_(0,0.005) 
#nn.init.normal(fc.weight.data,0,0.005)
#nn.init.xavier_normal(fc.weight.data,1)
nn.init.kaiming_normal(fc.weight.data)

print fc.weight.data
print fc.weight.data.mean()
print fc.weight.data.std()

In [None]:
#对于一个网络来说
class sim_net(nn.Module):
    def __init__(self):
        super(sim_net, self).__init__()
        self.l1 = nn.Sequential(
            nn.Linear(30, 40),
            nn.ReLU()
        )
        
        self.l1[0].weight.data = torch.randn(40, 30) # 直接对某一层初始化
        
        self.l2 = nn.Sequential(
            nn.Linear(40, 50),
            nn.ReLU()
        )
        
        self.l3 = nn.Sequential(
            nn.Linear(50, 10),
            nn.ReLU()
        )
    
    def forward(self, x):
        x = self.l1(x)
        x =self.l2(x)
        x = self.l3(x)
        return x

for layer in net2.modules():
    if isinstance(layer, nn.Linear):
        param_shape = layer.weight.shape
        layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape)) 

## 模型保存

In [None]:
# 仅保存和加载模型参数(推荐使用)
torch.save(model_object.state_dict(), 'params.pkl')
model_object.load_state_dict(torch.load('params.pkl'))

# 保存和加载整个模型
torch.save(model_object, 'model.pkl')
model = torch.load('model.pkl')