# 基础数据操作相关API

In [1]:
import torch
import numpy as np
import pandas as pd

In [2]:
a = torch.Tensor([[1,2,3],[4,5,6]])
print(a)
print(a.dtype)
print(a.type())

tensor([[1., 2., 3.],
        [4., 5., 6.]])
torch.float32
torch.FloatTensor


In [3]:
a = torch.Tensor(2, 3)
print(a)
print(a.type())

tensor([[7.5554e+28, 1.7777e+31, 5.2392e-11],
        [5.3379e+31, 4.2964e+24, 7.5555e+31]])
torch.FloatTensor


In [4]:
a = torch.ones(2, 3)
print(a)
print(a.type())

tensor([[1., 1., 1.],
        [1., 1., 1.]])
torch.FloatTensor


In [5]:
a = torch.eye(2, 3) # 一般用于生成单位矩阵
print(a)

tensor([[1., 0., 0.],
        [0., 1., 0.]])


In [6]:
a = torch.zeros(2, 3)
print(a)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [7]:
b = torch.ones_like(a) # 与a形状相同的
print(b)

tensor([[1., 1., 1.],
        [1., 1., 1.]])


In [116]:
# 随机分布
# 随机种子
torch.manual_seed(666)
a = torch.rand(2, 2) # 从[0, 1)之间的均匀分布中采样
print(a)
a = torch.normal(mean=0, std=torch.rand(5)) # 5个分布中每个分布随机采1个值(均值相同)
print(a)
a = torch.normal(mean=torch.rand(5), std=torch.rand(5)) # 5个分布中每个分布随机采1个值
print(a)
a = np.random.normal(loc=[0, 100], scale=[1, 2], size=[10, 2]) 
# numpy也可以！同时从2个分布中采样，“一列”对应一个分布，torch还不支持这种操作
print(a)

tensor([[0.3119, 0.2701],
        [0.1118, 0.1012]])
tensor([0.1790, 0.0072, 0.0977, 0.1582, 0.0467])
tensor([0.3437, 0.4959, 0.9157, 1.3216, 0.5133])
[[-4.84831321e-02  9.98318002e+01]
 [ 7.32938291e-02  1.00794624e+02]
 [-6.44526809e-01  9.88820567e+01]
 [-1.09776913e-01  9.82615132e+01]
 [ 1.37930820e-01  9.76756827e+01]
 [-1.82387258e+00  1.00031312e+02]
 [ 2.63948948e-01  1.00460321e+02]
 [-1.55794891e+00  1.01989798e+02]
 [ 4.91901035e-01  9.90212363e+01]
 [-1.36130821e+00  9.88244449e+01]]


In [9]:
a = torch.Tensor(5, 5).uniform_(-1, 1) # 均匀分布
a

tensor([[ 0.4072,  0.1162, -0.8812,  0.9023,  0.1591],
        [-0.8144, -0.9872, -0.1468, -0.1269, -0.0321],
        [ 0.6604,  0.6064, -0.9005, -0.0965,  0.2162],
        [ 0.7332, -0.3575, -0.7809, -0.7300,  0.0055],
        [ 0.1569,  0.8255,  0.7798, -0.2301, -0.4436]])

In [10]:
# 序列
a = torch.arange(1, 10, 1)
print(a)
a = torch.linspace(2, 10, 3)
print(a)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([ 2.,  6., 10.])


In [11]:
a = torch.randperm(10) # 打乱索引
print(a)
a = np.random.permutation(10)
print(a)

tensor([5, 2, 1, 3, 8, 9, 4, 0, 6, 7])
[0 6 7 3 9 5 1 8 4 2]


In [12]:
# tensor的属性
# torch.device('cuda:0') mac m1没有cuda...
a = torch.tensor([1,2], dtype=torch.float32, device=torch.device('cpu'))
print(a)
# 稀疏张量
indices = torch.tensor([[0, 1, 1], [2, 0, 2]])
values = torch.tensor([3, 4, 5], dtype=torch.float32)
x = torch.sparse_coo_tensor(indices, values, [2, 4], dtype=torch.int32, device=torch.device('cpu'))
print(x.to_dense())

tensor([1., 2.])
tensor([[0, 0, 3, 0],
        [4, 0, 5, 0]], dtype=torch.int32)


In [13]:
# 代数运算
# 加减: add, sub
a = torch.tensor([2,3])
b = torch.tensor([4,5])
c = a + b
print(c)
c = torch.add(a, b)
print(c)
c = a.add(b)
print(c)
a.add_(b) # a = a + b 覆盖a的值，其他运算下划线含义一致
print(a)

# 乘 mul，为哈达玛积（element wise，对应元素相乘），除法div含义类似
c = a * b
print(c)
c = torch.mul(a, b)
print(c)
c = a.mul(b)
print(c)
a.mul_(b)
print(a)

# 矩阵运算
a = torch.Tensor([[1], [4]])
b = torch.ones(1, 2) * 10
print(torch.matmul(a, b))
print(a @ b)
print(a.matmul(b))

# 高维度的矩阵运算同样支持，但矩阵乘法仅定义在最后2个维度上，且要求前面的维度保持一致
a = torch.ones(1, 2, 3, 4)
b = torch.ones(1, 2, 4, 2)
print((a @ b).shape)
print(a.matmul(b).shape)
print(torch.matmul(a, b).shape)

# 幂运算
a = torch.Tensor([[3, 4]])
print(torch.pow(a, 2))
print(a.pow(2))
print(a**2)
a.pow_(2)
print(a)
print(torch.exp(a)) # e^a
print(a.exp())
a.exp_()
print(a)

# 开根号
print(a.sqrt())
a.sqrt_()
print(a)

# 对数运算
a = torch.tensor([1, 10, 4], dtype=torch.float32)
print(a.log10())
print(torch.log10(a))
a.log_() # log默认以e为底
print(a)

tensor([6, 8])
tensor([6, 8])
tensor([6, 8])
tensor([6, 8])
tensor([24, 40])
tensor([24, 40])
tensor([24, 40])
tensor([24, 40])
tensor([[10., 10.],
        [40., 40.]])
tensor([[10., 10.],
        [40., 40.]])
tensor([[10., 10.],
        [40., 40.]])
torch.Size([1, 2, 3, 2])
torch.Size([1, 2, 3, 2])
torch.Size([1, 2, 3, 2])
tensor([[ 9., 16.]])
tensor([[ 9., 16.]])
tensor([[ 9., 16.]])
tensor([[ 9., 16.]])
tensor([[8.1031e+03, 8.8861e+06]])
tensor([[8.1031e+03, 8.8861e+06]])
tensor([[8.1031e+03, 8.8861e+06]])
tensor([[  90.0171, 2980.9580]])
tensor([[  90.0171, 2980.9580]])
tensor([0.0000, 1.0000, 0.6021])
tensor([0.0000, 1.0000, 0.6021])
tensor([0.0000, 2.3026, 1.3863])


In [14]:
"""
in-place操作：就地操作，也称为原位操作，不允许使用临时变量，如加法add_等带下划线的操作
广播机制：满足一定条件，参与运算的2个变量的shape可自动对齐
1.每个张量至少有一个维度
2.满足右对齐：如a->shape(2, 1, 3), b->shape(3,),a+b时,b会被扩展到(1, 1, 3),在逐个比较shape的每个维度满足：值相同或有一个为1
"""
c = torch.rand(2, 1, 3) + torch.rand(3,)
print(c)
print(c.shape)

# numpy同样满足广播机制
c = np.random.rand(2, 4, 1, 3) + np.random.rand(4, 2, 3)
print(c)
print(c.shape)

tensor([[[1.0602, 1.5524, 1.3197]],

        [[0.7598, 1.4492, 0.9827]]])
torch.Size([2, 1, 3])
[[[[1.47118582 1.33780221 0.54004127]
   [1.04589429 1.16738278 0.41124551]]

  [[0.73005656 1.35408101 1.39176608]
   [0.58443805 0.61709771 1.35733496]]

  [[0.77535224 0.70587079 0.30410549]
   [1.18699594 0.88603694 0.74607413]]

  [[1.31243008 0.6470214  1.07103127]
   [1.89980652 0.57026855 1.7894615 ]]]


 [[[0.83008059 1.20883149 0.85903534]
   [0.40478905 1.03841206 0.73023958]]

  [[1.44618569 1.09474359 1.42894961]
   [1.30056718 0.3577603  1.39451849]]

  [[1.28513222 0.70842977 1.22537784]
   [1.69677592 0.88859592 1.66734649]]

  [[1.1561804  0.74955137 0.18614949]
   [1.74355684 0.67279852 0.90457972]]]]
(2, 4, 2, 3)


In [15]:
# 取整｜取余
a = torch.rand(2, 2)
a *= 10
print(a)
print(torch.floor(a)) # 向上取整
print(torch.ceil(a)) # 向下取整
print(torch.round(a)) # 四舍五入
print(torch.trunc(a)) # 裁剪，只取整数部分
print(torch.frac(a)) # 只取小数部分
print(a % 2) # 取余

tensor([[1.4637, 4.0186],
        [2.8766, 6.7941]])
tensor([[1., 4.],
        [2., 6.]])
tensor([[2., 5.],
        [3., 7.]])
tensor([[1., 4.],
        [3., 7.]])
tensor([[1., 4.],
        [2., 6.]])
tensor([[0.4637, 0.0186],
        [0.8766, 0.7941]])
tensor([[1.4637, 0.0186],
        [0.8766, 0.7941]])


In [299]:
# tensor比较运算
a = torch.tensor([[2, 1, 3], [5, 1, 2]])
b = torch.tensor([[2, 1, 3], [5, 2, 2]])
print(a == b)
print(torch.eq(a, b)) # 逐个比较a和b中的元素，相等返回true，否则返回false，返回值也是一个tensor
print(torch.ge(a, b)) # a >= b
print(torch.gt(a, b)) # a > b
print(torch.le(a, b)) # a <= b
print(torch.lt(a, b)) # a < b
print(torch.ne(a, b)) # a != b

a = torch.tensor([[2, 1, 3], [5, 1, 2]])
b = torch.tensor([[2, 1, 3], [5, 1, 2], [4, 1, 4]])
print(torch.equal(a, b)) # 返回值为true或false，shape和每个对应元素都相同才返回true

tensor([[ True,  True,  True],
        [ True, False,  True]])
tensor([[ True,  True,  True],
        [ True, False,  True]])
tensor([[ True,  True,  True],
        [ True, False,  True]])
tensor([[False, False, False],
        [False, False, False]])
tensor([[True, True, True],
        [True, True, True]])
tensor([[False, False, False],
        [False,  True, False]])
tensor([[False, False, False],
        [False,  True, False]])
False


In [26]:
# 排序｜前k大｜前k小｜第k小
a = torch.rand(3, 5)
# sort的第二个参数dim，0是沿着第一个维度（每列有序），1沿着第二个维度（每行有序），
# -1等同于倒数第一个维度，-2依次类推，默认值-1
sorts, indices = torch.sort(a)
print(sorts)
print(indices)

tensor([[0.0436, 0.1971, 0.2023, 0.9025, 0.9869],
        [0.3196, 0.4349, 0.4473, 0.5565, 0.5772],
        [0.2355, 0.2576, 0.4258, 0.5158, 0.7528]])
tensor([[1, 0, 2, 3, 4],
        [2, 1, 3, 4, 0],
        [4, 3, 2, 0, 1]])


In [34]:
a = torch.tensor([[2, 4, 3, 1, 5], [2, 6, 5, 1, 4]])
# 寻找前k大的元素以及对应的索引
torch.topk(a, 2, dim=1)

torch.return_types.topk(
values=tensor([[5, 4],
        [6, 5]]),
indices=tensor([[4, 1],
        [1, 2]]))

In [35]:
# 寻找按某个维度第k小的元素
torch.kthvalue(a, 3, dim=1)

torch.return_types.kthvalue(
values=tensor([3, 4]),
indices=tensor([2, 4]))

In [41]:
# 判断nan，inf，以及是否有界
a = torch.rand(2, 3)
print(a)
print(a / 0)
print(torch.isinf(a / 0)) # 判断是否为inf
print(torch.isfinite(a / 0)) # 判断是否有界
print(torch.isfinite(a))
print(torch.isnan(a)) # 判断是否为nan
a = torch.tensor([1, np.nan, 2])
print(torch.isnan(a))

tensor([[0.8472, 0.2549, 0.7015],
        [0.2809, 0.3289, 0.2951]])
tensor([[inf, inf, inf],
        [inf, inf, inf]])
tensor([[True, True, True],
        [True, True, True]])
tensor([[False, False, False],
        [False, False, False]])
tensor([[True, True, True],
        [True, True, True]])
tensor([[False, False, False],
        [False, False, False]])
tensor([False,  True, False])


In [58]:
# torch中的三角函数
a = torch.tensor([1, -2, 0])
print(torch.sin(a))
print(torch.cos(a))
# ......

tensor([ 0.8415, -0.9093,  0.0000])
tensor([ 0.5403, -0.4161,  1.0000])


In [59]:
# torch中其他数学函数
print(torch.abs(a))
print(torch.reciprocal(a)) # 求倒数
print(torch.sigmoid(a))
print(torch.relu(a))
print(torch.sign(a)) # 符号函数
# ...

tensor([1, 2, 0])
tensor([ 1.0000, -0.5000,     inf])
tensor([0.7311, 0.1192, 0.5000])
tensor([1, 0, 0])
tensor([ 1, -1,  0])


In [112]:
# torch统计学函数：torch.mean, torch.max, torch.std......
a = torch.tensor([[1, -2, 0], [4, 6, 8]], dtype=torch.float32)
print(torch.mean(a))
print(torch.mode(a))
# ......

# 计算直方图：参数：（input，bin，min，max）
# 第二个参数bin表示等距划分为多少个区间（每个区间左闭右开，最后一个区间两边都是闭合的）
# min和max表示统计的范围，当输入的min=max时，则取输入数据的最小值最大值替代；小于min和大于max的元素将被忽略
print(torch.histc(a, 5, 0, 0))

# 返回每个值的频数
a = torch.randint(0, 10, (10,))
print(a)
# bincount只支持一维非负整数（比如处理聚类|分类结果）
print(torch.bincount(a))

tensor(2.8333)
torch.return_types.mode(
values=tensor([-2.,  4.]),
indices=tensor([1, 0]))
tensor([1., 2., 0., 1., 2.])
tensor([0, 6, 6, 8, 6, 5, 2, 2, 1, 1])
tensor([1, 2, 2, 0, 0, 1, 3, 0, 1])


In [113]:
# torch.distributions 定义分布函数
# torch中还定义了衡量分布差异性的指标，如KL散度等
# kl divergence|transforms|constraint

In [302]:
# 范数
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([4, 5, 6])
print(torch.dist(a, b, p=2))

# 核范数（用于低秩约束）
# torch.manual_seed(666)
a = torch.rand(5, 2)
print(torch.norm(a, p='nuc'))

tensor(5.1962)
tensor(2.4581)


In [140]:
# 矩阵分解（LU（上三角+下三角），QR（正交矩阵+上三角），EVD（PCA），SVD（LDA））
# 详细内容见ipad笔记
torch.svd(a)

torch.return_types.svd(
U=tensor([[-0.4143,  0.3777],
        [-0.6097, -0.4521],
        [-0.1738, -0.3588],
        [-0.4661, -0.2697],
        [-0.4573,  0.6719]]),
S=tensor([1.1700, 0.3964]),
V=tensor([[-0.6803, -0.7329],
        [-0.7329,  0.6803]]))

In [150]:
# tensor裁剪运算
torch.manual_seed(666)
a = torch.rand(2, 2) * 10
print(a)
print(a.clamp(2, 6)) # 数据保持在给定范围内，超过该范围替换为端点值

tensor([[3.1190, 2.7013],
        [1.1178, 1.0119]])
tensor([[3.1190, 2.7013],
        [2.0000, 2.0000]])


In [182]:
# tensor索引 数据筛选|重新组合
a = torch.rand(4, 4)
b = torch.rand(4, 4)
print(a)
print(b)
# 每个位置根据条件判断，满足条件选择a的元素，否则选择b中的元素
out = torch.where(a > 0.5, a, b)
print(out)

print('index_select------------------------------------------------------')
a = torch.rand(4, 4)
print(a)
# 指定维度上将不同索引的数据按照指定顺序重新组合
print(torch.index_select(a, dim=0, index=torch.tensor([0, 3, 2])))
print(a[[0, 3, 2]])

# gather
print('gather------------------------------------------------------')
a = torch.arange(1, 17).view(4, 4)
print(a)
# 输出shape和index的shape相同（含义看打印结果吧），也是对原数据进行重组
print(torch.gather(a, dim=0, index=torch.tensor([[0, 1, 1, 1], [2, 1, 0, 0]])))

# select
print(a)
mask = torch.gt(a, 8)
print(torch.masked_select(a, mask))
print(torch.masked_select(a, a > 8))
print(a[a > 8])

print('take------------------------------------------------------')
# take: 类似index_select，只是将a看成1维向量，再取对应的索引
print(torch.take(a, index=torch.tensor([0, 15, 13, 10])))

print('nonzero------------------------------------------------------')
# nonzero: 返回非0元素的索引
a = torch.tensor([[0, 1, 2, 0], [2, 3, 0, 1]])
out = torch.nonzero(a)
print(out)

tensor([[0.8841, 0.6998, 0.2183, 0.8387],
        [0.9829, 0.9044, 0.8159, 0.5987],
        [0.0748, 0.8345, 0.4716, 0.1894],
        [0.2303, 0.3802, 0.6539, 0.5621]])
tensor([[0.5167, 0.1744, 0.7737, 0.6038],
        [0.6025, 0.1154, 0.3679, 0.4395],
        [0.9794, 0.8654, 0.3408, 0.7495],
        [0.3279, 0.9853, 0.9525, 0.2259]])
tensor([[0.8841, 0.6998, 0.7737, 0.8387],
        [0.9829, 0.9044, 0.8159, 0.5987],
        [0.9794, 0.8345, 0.3408, 0.7495],
        [0.3279, 0.9853, 0.6539, 0.5621]])
index_select------------------------------------------------------
tensor([[0.2334, 0.8152, 0.9477, 0.7594],
        [0.3135, 0.3180, 0.3245, 0.0351],
        [0.1740, 0.9530, 0.3065, 0.8933],
        [0.4779, 0.8937, 0.7975, 0.7827]])
tensor([[0.2334, 0.8152, 0.9477, 0.7594],
        [0.4779, 0.8937, 0.7975, 0.7827],
        [0.1740, 0.9530, 0.3065, 0.8933]])
tensor([[0.2334, 0.8152, 0.9477, 0.7594],
        [0.4779, 0.8937, 0.7975, 0.7827],
        [0.1740, 0.9530, 0.3065, 0.8933]])
gat

In [193]:
# tensor组合和拼接: torch.cat torch.stack
a = torch.zeros(2, 4)
b = torch.ones(2, 4)
out = torch.cat([a, b], dim=0)
print(out)

# stack是拓展了新的维度再堆叠数据
a = torch.arange(1, 7).view(2, 3)
b = torch.arange(7, 13).view(2, 3)
c = torch.arange(7, 13).view(2, 3)
out = torch.stack([a, b, c], dim=0)
print(a.shape)
print(b.shape)
print(out.shape)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
torch.Size([2, 3])
torch.Size([2, 3])
torch.Size([3, 2, 3])


In [205]:
# tensor切片: chunk split
a = torch.rand(5, 4)
# chunk：第2个参数表示切成多少份
print(torch.chunk(a, 2, dim=0))
print(torch.chunk(a, 2, dim=1))

# split：第2个参数表示每份多少个，如输入3表示每份3个，输入[1, 2, ...]定义了每份多少个，不规则切片
out = torch.split(a, 2, dim=0)
print(out)
print('------------------------------------------------------')
out = torch.split(a, [1, 2, 2], dim=0)
print(out)

(tensor([[0.1516, 0.6787, 0.7833, 0.8108],
        [0.2168, 0.5138, 0.7992, 0.7491],
        [0.0815, 0.8425, 0.7910, 0.3669]]), tensor([[0.5206, 0.2379, 0.7905, 0.4837],
        [0.6620, 0.6338, 0.4499, 0.7848]]))
(tensor([[0.1516, 0.6787],
        [0.2168, 0.5138],
        [0.0815, 0.8425],
        [0.5206, 0.2379],
        [0.6620, 0.6338]]), tensor([[0.7833, 0.8108],
        [0.7992, 0.7491],
        [0.7910, 0.3669],
        [0.7905, 0.4837],
        [0.4499, 0.7848]]))
(tensor([[0.1516, 0.6787, 0.7833, 0.8108],
        [0.2168, 0.5138, 0.7992, 0.7491]]), tensor([[0.0815, 0.8425, 0.7910, 0.3669],
        [0.5206, 0.2379, 0.7905, 0.4837]]), tensor([[0.6620, 0.6338, 0.4499, 0.7848]]))
------------------------------------------------------
(tensor([[0.1516, 0.6787, 0.7833, 0.8108]]), tensor([[0.2168, 0.5138, 0.7992, 0.7491],
        [0.0815, 0.8425, 0.7910, 0.3669]]), tensor([[0.5206, 0.2379, 0.7905, 0.4837],
        [0.6620, 0.6338, 0.4499, 0.7848]]))


In [248]:
# tensor变形操作
a = torch.arange(16)

print(torch.reshape(a, (4, -1)))

print('------------------------------------------------------')
b = a.view(4, 4)
print(b.T)
print(torch.t(b))

print('------------------------------------------------------')
a = torch.rand(3, 4)
# 交换指定维度
print(a.shape)
print(torch.transpose(a, 0, 1).shape)

print('------------------------------------------------------')
c = a.view(3, 1, 4)
# 删除维度值为1的维度
out = torch.squeeze(c)
print(c.shape)
print(out.shape)
# 添加纬度值为1的维度
out = torch.unsqueeze(out, -1)
print(out.shape)

print('------------------------------------------------------')
print(a)
# 按照dim拆分tensor，返回一个元组
out = torch.unbind(a, dim=1)
print(out)

print('------------------------------------------------------')
# 按照指定维度进行翻转
print(a)
print(torch.flip(a, dims=[1, 0]))
print(torch.rot90(a, 2)) # 逆时针旋转a90度2次，第2个参数大于0逆时针，小于0顺时针
a = torch.rand(3, 5, 6)
# 也可以指定哪2个维度旋转
print(torch.rot90(a, 1, dims=[0, 2]).shape)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]])
------------------------------------------------------
tensor([[ 0,  4,  8, 12],
        [ 1,  5,  9, 13],
        [ 2,  6, 10, 14],
        [ 3,  7, 11, 15]])
tensor([[ 0,  4,  8, 12],
        [ 1,  5,  9, 13],
        [ 2,  6, 10, 14],
        [ 3,  7, 11, 15]])
------------------------------------------------------
torch.Size([3, 4])
torch.Size([4, 3])
------------------------------------------------------
torch.Size([3, 1, 4])
torch.Size([3, 4])
torch.Size([3, 4, 1])
------------------------------------------------------
tensor([[0.1129, 0.9829, 0.1387, 0.9355],
        [0.8607, 0.3921, 0.0525, 0.0413],
        [0.9672, 0.5938, 0.1712, 0.8531]])
(tensor([0.1129, 0.8607, 0.9672]), tensor([0.9829, 0.3921, 0.5938]), tensor([0.1387, 0.0525, 0.1712]), tensor([0.9355, 0.0413, 0.8531]))
------------------------------------------------------
tensor([[0.1129, 0.9829, 0.1387, 0.9355],
   

In [250]:
# tensor填充操作
torch.full((2, 3), 3.14)

tensor([[3.1400, 3.1400, 3.1400],
        [3.1400, 3.1400, 3.1400]])

In [251]:
# 频谱操作: torch.fft torch.stft

In [258]:
# tensor <-> numpy
a = torch.Tensor([1, 2, 3])
k = a.numpy()
print(k)
print(torch.from_numpy(k))

[1. 2. 3.]
tensor([1., 2., 3.])


# 深度学习相关

模型保存与加载  
torch.saves(state, dir) 保存/序列化  
torch.load(dir) 加载模型

并行化  
torch.get_num_threads()  
torch.set_num_threads(int) 设置用于并行化cpu操作的OpenMP线程数

GPU  
a = torch.tensor([1, 2, 3])  
a.to('cuda')

Variable已经合并到Tensor(小写tensor)

梯度计算  
torch.autograd.backward|torch.autograd.grad

In [292]:
# 梯度计算
x = torch.ones(2, 2, requires_grad=True)
# 钩子函数，对计算得到的梯度做其他的处理
x.register_hook(lambda grad: grad * 2)
y = x + 2
z = y * y * 3

# 参数是权重矩阵，最终输出结果会与tensors对应的梯度矩阵相乘
w = torch.Tensor([[1, 0], [0, 1]])
z.backward(w, retain_graph=True)
print(x.grad) # y,z=None
print(y.grad_fn) # x=None
print(z.grad_fn)

# 梯度默认会被累加
torch.autograd.backward(z, grad_tensors=w, retain_graph=True)
print(x.grad) # y,z=None
print(y.grad_fn) # x=None
print(z.grad_fn)

# 这个可以直接打印的
# 1.钩子函数不会生效
# 2.梯度不会被累加
print(torch.autograd.grad(z, [x, y, z], grad_outputs=w))

tensor([[36.,  0.],
        [ 0., 36.]])
<AddBackward0 object at 0x000001BA29BC4280>
<MulBackward0 object at 0x000001BA29BC4550>
tensor([[72.,  0.],
        [ 0., 72.]])
<AddBackward0 object at 0x000001BA29BC47F0>
<MulBackward0 object at 0x000001BA29BC4550>
(tensor([[18.,  0.],
        [ 0., 18.]]), tensor([[18.,  0.],
        [ 0., 18.]]), tensor([[1., 0.],
        [0., 1.]]))


In [294]:
"""
torch.autograd.Function
每个原始自动求导运算是2个在Tensor上运行的函数=forward（前向传播）+backward（反向梯度求导）
"""
class line(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, w, x, b):
        """
        ctx: 上下文管理器
        """
        ctx.save_for_backward(w, x, b)
        return w * x + b
    
    @staticmethod
    def backward(ctx, grad_out):
        """
        grad_out: 上一级的导数（链式法则）
        """
        w, x, b = ctx.saved_tensors
        grad_w = grad_out * x
        grad_x = grad_out * w
        grad_b = grad_out
        return grad_w, grad_x, grad_b
    
w = torch.rand(2, 2, requires_grad=True)
x = torch.rand(2, 2, requires_grad=True)
b = torch.rand(2, 2, requires_grad=True)

out = line.apply(w, x, b)
out.backward(torch.ones(2, 2))

print(w, x, b)
print(w.grad, x.grad, b.grad)

tensor([[0.7603, 0.3122],
        [0.1599, 0.2643]], requires_grad=True) tensor([[0.9128, 0.6314],
        [0.2291, 0.8903]], requires_grad=True) tensor([[0.4087, 0.6699],
        [0.2634, 0.4022]], requires_grad=True)
tensor([[0.9128, 0.6314],
        [0.2291, 0.8903]]) tensor([[0.7603, 0.3122],
        [0.1599, 0.2643]]) tensor([[1., 1.],
        [1., 1.]])


torch.nn库：专门为神经网络设计的模块化接口，构建于autograd之上，可以用来定义和运行神经网络

可视化工具：visdom  tensorboardX  torchvision