## 有关pytorch的Tensor用法

### 1. 导入部分

In [1]:
import torch
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### 2. 创建tensor

In [2]:
x = torch.empty(5, 3)
print(x)

tensor([[1.0194e-38, 8.4490e-39, 1.0469e-38],
        [9.3674e-39, 9.9184e-39, 8.7245e-39],
        [9.2755e-39, 8.9082e-39, 9.9184e-39],
        [8.4490e-39, 9.6429e-39, 1.0653e-38],
        [1.0469e-38, 4.2246e-39, 1.0378e-38]])


In [3]:
x = torch.rand(5, 3)
print(x)

tensor([[0.2564, 0.0780, 0.7804],
        [0.9603, 0.7824, 0.3108],
        [0.4476, 0.4636, 0.6215],
        [0.8823, 0.4449, 0.8039],
        [0.8484, 0.4238, 0.2953]])


有以下几种
1. torch.FloatTensor：32bit float
2. torch.DoubleTensor：64bit float
3. torch.HalfTensor：16bit float
4. torch.ByteTensor：8bit usigned integer
5. torch.CharTensor：8bit signed integer
6. torch.ShortTensor：16bit int
7. torch.IntTensor：32bit int
8. torch.LongTensor：64bit int

In [4]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [5]:
x = torch.tensor([5.5, 3])
print(x)

tensor([5.5000, 3.0000])


randn_like即用randn生成一个新tensor，其规格跟里面第一个变量一样

In [6]:
x = x.new_ones(5, 3, dtype=torch.float64)  # 返回的tensor默认具有相同的torch.dtype和torch.device
print(x)

x = torch.randn_like(x, dtype=torch.float) # 指定新的数据类型
print(x) 
print(x.size())
print(x.shape)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[ 0.8663,  0.8838, -1.9079],
        [ 1.6581,  0.3732, -1.1469],
        [ 1.0164, -0.7236,  0.7262],
        [-0.6785, -1.1484, -0.1730],
        [ 0.6678, -1.2591,  0.6869]])
torch.Size([5, 3])
torch.Size([5, 3])


torch.Tensor(里面放数据即列表（不推荐），或者规格shape)
torch.tensor(里面只能放数据即列表)
可以由numpy直接导入成tensor

In [7]:
a = np.ones([2, 3])
print(a)
a = torch.from_numpy(a)
print('a =', a, '\n', 'a_TYPE =', a.type(), 'a_SHAPE = ', a.shape)
# 仅仅一个向量
a = np.array([2, 3])
print(a)
a = torch.from_numpy(a)
print('a =', a, '\n', 'a_TYPE =', a.type(), 'a_SHAPE = ', a.shape)

[[1. 1. 1.]
 [1. 1. 1.]]
a = tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64) 
 a_TYPE = torch.DoubleTensor a_SHAPE =  torch.Size([2, 3])
[2 3]
a = tensor([2, 3], dtype=torch.int32) 
 a_TYPE = torch.IntTensor a_SHAPE =  torch.Size([2])


还有很多函数可以创建Tensor，去翻翻官方API就知道了，下表给了一些常用的作参考。

函数|功能
--|:--:
Tensor(*sizes)	|基础构造函数
tensor(data,)	|类似np.array的构造函数
ones(*sizes)	|全1Tensor
zeros(*sizes)	|全0Tensor
eye(*sizes)	|对角线为1，其他为0
arange(s,e,step)	|从s到e，步长为step
linspace(s,e,steps)	|从s到e，均匀切分成steps份
rand/randn(*sizes)	|均匀/标准分布
normal(mean,std)/uniform(from,to)	|正态分布/均匀分布
randperm(m)	|随机排列

### 3. tensor运算

In [8]:
y = torch.rand(5, 3)
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)


tensor([[ 1.3494,  1.5158, -1.6823],
        [ 1.7963,  0.8630, -0.7432],
        [ 1.9406,  0.1391,  0.7442],
        [-0.5537, -0.9416, -0.1431],
        [ 1.0719, -0.9538,  1.0994]])


另一种方法，inplace操作（即替换），后面有_做结尾，类似的还有x.copy_(y), x.t_()

In [9]:
# adds x to y
y.add_(x)
print(y)

tensor([[ 1.3494,  1.5158, -1.6823],
        [ 1.7963,  0.8630, -0.7432],
        [ 1.9406,  0.1391,  0.7442],
        [-0.5537, -0.9416, -0.1431],
        [ 1.0719, -0.9538,  1.0994]])


### 4. tensor索引

下面代码的意义是，把x的第0维度的所有值叫做y，然后给y+1，发现x也随之改变

In [10]:
print(x)
y = x[0, :]
y += 1
print(y)
print(x[0, :]) # 源tensor也被改了

tensor([[ 0.8663,  0.8838, -1.9079],
        [ 1.6581,  0.3732, -1.1469],
        [ 1.0164, -0.7236,  0.7262],
        [-0.6785, -1.1484, -0.1730],
        [ 0.6678, -1.2591,  0.6869]])
tensor([ 1.8663,  1.8838, -0.9079])
tensor([ 1.8663,  1.8838, -0.9079])


In [11]:
a = torch.randn(1, 2, 3, 4)
print(a)
# 直接索引
print(a[0])
print(a[0, 1])
print(a[0, 1, 1])
print(a[0, 1, 1, 1])

tensor([[[[ 1.0146, -0.5719,  0.6156, -0.8009],
          [ 0.2834, -0.5362,  0.8377, -0.1875],
          [ 0.1118, -0.8197, -0.6307, -0.4659]],

         [[ 0.0371, -0.9249,  0.1584,  0.1074],
          [ 1.6898, -0.5705, -1.2253,  0.4992],
          [ 0.5658, -0.7758, -0.6617,  0.2723]]]])
tensor([[[ 1.0146, -0.5719,  0.6156, -0.8009],
         [ 0.2834, -0.5362,  0.8377, -0.1875],
         [ 0.1118, -0.8197, -0.6307, -0.4659]],

        [[ 0.0371, -0.9249,  0.1584,  0.1074],
         [ 1.6898, -0.5705, -1.2253,  0.4992],
         [ 0.5658, -0.7758, -0.6617,  0.2723]]])
tensor([[ 0.0371, -0.9249,  0.1584,  0.1074],
        [ 1.6898, -0.5705, -1.2253,  0.4992],
        [ 0.5658, -0.7758, -0.6617,  0.2723]])
tensor([ 1.6898, -0.5705, -1.2253,  0.4992])
tensor(-0.5705)


与python类似，设某数组（list类型）为x
1. x[:3]代表取x从0到3不包含3一共3个数，x[1:3]同理
2. x[3:]表示3往后的所有元素包括3
3. x[-2:]-2为反索引，一个数组，正着数索引是0、1、2、3……反着数是-1（最后一位）、-2（倒数第二位）、-3、-4……（无0），所以本例子的意思是拿出从-2位到最后的数值，即-2和-1
4. :是一个三元操作符，x[0:4:2]表示0-4，步长为2，三元操作数的三个数可省略，第一个数默认为0，第二个数默认为end值，第三个数默认为1

In [12]:
print(a[0,1,2,::3])

tensor([0.5658, 0.2723])


除了常用的索引选择数据之外，PyTorch还提供了一些高级的选择函数:

函数|功能
--|:--:
index_select(input, dim, index)	|在指定维度dim上选取，比如选取某些行、某些列
masked_select(input, mask)	|例子如上，a[a>0]，使用ByteTensor进行选取
nonzero(input)	|非0元素的下标
gather(input, dim, index)	|根据index，在dim维度上选取数据，输出的size与index一样

index_select，下列代码的意义为，选择a(1, 2, 3, 4)索引为2的那个维度(即3)，然后在这3里面拿索引为0和2的那俩

In [13]:
print(a)
print(a.index_select(2, torch.tensor([0, 2])))
print(a.index_select(2, torch.arange(2)))  # arange(2)即拿出索引0和1

tensor([[[[ 1.0146, -0.5719,  0.6156, -0.8009],
          [ 0.2834, -0.5362,  0.8377, -0.1875],
          [ 0.1118, -0.8197, -0.6307, -0.4659]],

         [[ 0.0371, -0.9249,  0.1584,  0.1074],
          [ 1.6898, -0.5705, -1.2253,  0.4992],
          [ 0.5658, -0.7758, -0.6617,  0.2723]]]])
tensor([[[[ 1.0146, -0.5719,  0.6156, -0.8009],
          [ 0.1118, -0.8197, -0.6307, -0.4659]],

         [[ 0.0371, -0.9249,  0.1584,  0.1074],
          [ 0.5658, -0.7758, -0.6617,  0.2723]]]])
tensor([[[[ 1.0146, -0.5719,  0.6156, -0.8009],
          [ 0.2834, -0.5362,  0.8377, -0.1875]],

         [[ 0.0371, -0.9249,  0.1584,  0.1074],
          [ 1.6898, -0.5705, -1.2253,  0.4992]]]])


...代表省略任意多的维度，视具体情况

In [14]:
print(a)
print(a[0, :, :, 2])
print(a[0, ..., 2])

tensor([[[[ 1.0146, -0.5719,  0.6156, -0.8009],
          [ 0.2834, -0.5362,  0.8377, -0.1875],
          [ 0.1118, -0.8197, -0.6307, -0.4659]],

         [[ 0.0371, -0.9249,  0.1584,  0.1074],
          [ 1.6898, -0.5705, -1.2253,  0.4992],
          [ 0.5658, -0.7758, -0.6617,  0.2723]]]])
tensor([[ 0.6156,  0.8377, -0.6307],
        [ 0.1584, -1.2253, -0.6617]])
tensor([[ 0.6156,  0.8377, -0.6307],
        [ 0.1584, -1.2253, -0.6617]])


用罩子筛选合格的，然后铺平

In [15]:
x = torch.randn(3, 4)
print(x)
mask = x.ge(0) # ge:greater&equal
print(mask)
result = torch.masked_select(x, mask)
print(result)

tensor([[-1.2406,  2.0205, -0.2652,  1.1667],
        [-0.4649, -2.3626,  0.9680, -0.1199],
        [-1.4365,  0.8207, -2.7220, -0.3868]])
tensor([[False,  True, False,  True],
        [False, False,  True, False],
        [False,  True, False, False]])
tensor([2.0205, 1.1667, 0.9680, 0.8207])


In [16]:
x = torch.randn(5, 3)
print(x)
y = x.view(15)
z = x.view(-1, 5)  # -1所指的维度可以根据其他维度的值推出来
print(x.size(), y.size(), z.size())
print(y)
print(z)

tensor([[ 1.4163, -0.1488, -1.3667],
        [ 0.3497, -0.3696, -1.4712],
        [-1.0615,  1.7108, -0.2360],
        [-1.9929, -0.7716,  0.1674],
        [-0.3834, -1.0039,  0.7088]])
torch.Size([5, 3]) torch.Size([15]) torch.Size([3, 5])
tensor([ 1.4163, -0.1488, -1.3667,  0.3497, -0.3696, -1.4712, -1.0615,  1.7108,
        -0.2360, -1.9929, -0.7716,  0.1674, -0.3834, -1.0039,  0.7088])
tensor([[ 1.4163, -0.1488, -1.3667,  0.3497, -0.3696],
        [-1.4712, -1.0615,  1.7108, -0.2360, -1.9929],
        [-0.7716,  0.1674, -0.3834, -1.0039,  0.7088]])


注意张量之间的关联性，接上一段代码

In [17]:
x = torch.randn(5, 3)
y = x.view(15)
print(x)
print(y)
x += 1
print(x)
print(y) # 也加了1

tensor([[-0.1285,  0.1721, -0.9357],
        [ 0.3765, -0.4782, -1.2447],
        [-2.9380,  0.4827,  0.4531],
        [ 0.4134,  0.6425, -0.7259],
        [-0.0984,  0.6953, -0.3944]])
tensor([-0.1285,  0.1721, -0.9357,  0.3765, -0.4782, -1.2447, -2.9380,  0.4827,
         0.4531,  0.4134,  0.6425, -0.7259, -0.0984,  0.6953, -0.3944])
tensor([[ 0.8715,  1.1721,  0.0643],
        [ 1.3765,  0.5218, -0.2447],
        [-1.9380,  1.4827,  1.4531],
        [ 1.4134,  1.6425,  0.2741],
        [ 0.9016,  1.6953,  0.6056]])
tensor([ 0.8715,  1.1721,  0.0643,  1.3765,  0.5218, -0.2447, -1.9380,  1.4827,
         1.4531,  1.4134,  1.6425,  0.2741,  0.9016,  1.6953,  0.6056])


如果想完全克隆新的要用clone函数

In [18]:
x = torch.randn(5, 3)
x_cp = x.clone().view(15)
print(x)
print(x_cp)
x -= 1
print(x)
print(x_cp)

tensor([[-0.9657, -0.5606, -0.5309],
        [-1.3546, -0.0455,  0.5800],
        [ 0.2351,  2.4119,  1.2232],
        [-0.9601, -0.6223, -0.7039],
        [-0.2179,  1.0267, -0.2830]])
tensor([-0.9657, -0.5606, -0.5309, -1.3546, -0.0455,  0.5800,  0.2351,  2.4119,
         1.2232, -0.9601, -0.6223, -0.7039, -0.2179,  1.0267, -0.2830])
tensor([[-1.9657, -1.5606, -1.5309],
        [-2.3546, -1.0455, -0.4200],
        [-0.7649,  1.4119,  0.2232],
        [-1.9601, -1.6223, -1.7039],
        [-1.2179,  0.0267, -1.2830]])
tensor([-0.9657, -0.5606, -0.5309, -1.3546, -0.0455,  0.5800,  0.2351,  2.4119,
         1.2232, -0.9601, -0.6223, -0.7039, -0.2179,  1.0267, -0.2830])


另外，PyTorch还支持一些线性函数，这里提一下，免得用起来的时候自己造轮子，具体用法参考官方文档。如下表所示：

函数|功能
--|--
trace	|对角线元素之和(矩阵的迹)
diag	|对角线元素
triu/tril	|矩阵的上三角/下三角，可指定偏移量
mm/bmm	|矩阵乘法，batch的矩阵乘法
addmm/addbmm/addmv/addr/baddbmm..	|矩阵运算
t	|转置
dot/cross	|内积/外积
inverse	|求逆矩阵
svd	|奇异值分解

### 5. 广播
两个形状不同的Tensor按元素运算时，可能会触发广播（broadcasting）机制

In [19]:
# arange(start,end,step)
x = torch.arange(1, 3).view(1, 2)
print(x)
y = torch.arange(1, 4).view(3, 1)
print(y)
print(x + y)

tensor([[1, 2]])
tensor([[1],
        [2],
        [3]])
tensor([[2, 3],
        [3, 4],
        [4, 5]])


用y[:] = y + x来覆盖原来的y而不至于去开新的内存空间，有助于节省

In [20]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y = y + x
print(id(y) == id_before)  # False

x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y[:] = y + x
# 同效果
# y += x
# torch.add(x, y, out=y)
# y.add_(x)
print(id(y) == id_before)  # True

False
True


### 6. Tensor和NumPy

In [21]:
a = torch.ones(5)
b = a.numpy()
print(a, b)
a += 1
print(a, b)
b += 1
print(a, b)

a = np.ones(5)
b = torch.from_numpy(a)
print(a, b)
a += 1
print(a, b)
b += 1
print(a, b)

tensor([1., 1., 1., 1., 1.]) [1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.]) [2. 2. 2. 2. 2.]
tensor([3., 3., 3., 3., 3.]) [3. 3. 3. 3. 3.]
[1. 1. 1. 1. 1.] tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.] tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
[3. 3. 3. 3. 3.] tensor([3., 3., 3., 3., 3.], dtype=torch.float64)


### 7. GPU

In [22]:
# 以下代码只有在PyTorch GPU版本上才会执行
if torch.cuda.is_available():
    device = torch.device("cuda")  # GPU
    y = torch.ones_like(x, device=device)  # 直接创建一个在GPU上的Tensor
    x = x.to(device)  # 等价于 .to("cuda")
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))  # to()还可以同时更改数据类型

tensor([2, 3], device='cuda:0')
tensor([2., 3.], dtype=torch.float64)


## 自动求梯度

### 1. 基础操作

加了requires_grad=True那它就被“跟踪”了
注意
1. 梯度只能对标量求，对张量求就需要输入一个同规格的张量然后将此张量跟要求的张量对位加权乘处理为标量然后求（基本很少这么干）
2. 每次反向传播要先调用zero_的inplace方法将梯度清零，不然会累加

In [26]:
x = torch.ones(2, 2, requires_grad=True)
print(x)
print(x.grad_fn)

y = x + 2
print(y)
print(y.grad_fn)

z = y * y * 3
# z.mean就是返回矩阵所有元素加和的平均值
out = z.mean()
print(z)
print(out)
print(x)
out.backward()  # 等价于 out.backward(torch.tensor(1.))
print(x.grad)

# 再来反向传播一次，注意grad是累加的
out2 = x.sum()
out2.backward()
print(x.grad)

out3 = x.sum()
x.grad.data.zero_()
out3.backward()
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
None
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x000002A2A932F7C8>
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[5.5000, 5.5000],
        [5.5000, 5.5000]])
tensor([[1., 1.],
        [1., 1.]])


假设y由自变量x计算而来，w是和y同形的张量，则y.backward(w)的含义是：先计算l = torch.sum(y * w)，则l是个标量，然后求l对自变量x的导数。

In [27]:
x = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
y = 2 * x
z = y.view(2, 2)
print(z)
# 现在 z 不是一个标量，所以在调用backward时需要传入一个和z同形的权重向量进行加权求和得到一个标量。
v = torch.tensor([[1.0, 0.1], [0.01, 0.001]], dtype=torch.float)
z.backward(v)
print(x.grad)

tensor([[2., 4.],
        [6., 8.]], grad_fn=<ViewBackward>)
tensor([2.0000, 0.2000, 0.0200, 0.0020])


默认是不跟踪的，如果想要开始跟踪这个变量则应该使用inplace方法（带个下划线）来进行属性替换

In [24]:
a = torch.randn(2, 2)  # 缺失情况下默认 requires_grad = False
a = ((a * 3) / (a - 1))
print(a.requires_grad)  # False
a.requires_grad_(True)
print(a.requires_grad)  # True
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x000002A2A930BC88>


### 2. 中断梯度追踪

In [29]:
x = torch.tensor(1.0, requires_grad=True)
y1 = x ** 2
with torch.no_grad():
    y2 = x ** 3
y3 = y1 + y2

# 原本为y3=x^2+x^3
# 但是由于y2被阻断了，所以计算梯度时将y2刨除在外（类似于把y2当成了常量）
# 结论就是2

print(x.requires_grad)
print(y1, y1.requires_grad)  # True
print(y2, y2.requires_grad)  # False
print(y3, y3.requires_grad)  # True

y3.backward()
print(x.grad)

True
tensor(1., grad_fn=<PowBackward0>) True
tensor(1.) False
tensor(2., grad_fn=<AddBackward0>) True
tensor(2.)


此外，如果我们想要修改tensor的数值，但是又不希望被autograd记录（即不会影响反向传播），那么我们可以对tensor.data进行操作。

In [30]:
x = torch.ones(1, requires_grad=True)

print(x.data)  # 还是一个tensor
print(x.data.requires_grad)  # 但是已经是独立于计算图之外，所以此处应为false

y = 2 * x
x.data *= 100  # 只改变了值，不会记录在计算图，所以不会影响梯度传播

y.backward()
print(x)  # 更改data的值也会影响tensor的值
print(x.grad)

tensor([1.])
False
tensor([100.], requires_grad=True)
tensor([2.])
