### 数据类型
* FloatTensor，对应torch.float，torch.float32，对应 Tensor.float()
* DoubleTensor，对应torch.double，torch.float64， 对应 Tensor.double()
* HalfTensor，对应torch.half，torch.float16， 对应 Tensor.half()
* IntTensor, 对应torch.int，torch.int32， 对应 Tensor.int()
* LongTensor，对应torch.long，torch.int64， 对应 Tensor.long()
* ShortTensor，对应torch.short，torch.int16， 对应 Tensor.short()
* ByteTensor，对应torch.uint8， 对应 Tensor.byte()
* CharTensor，对应torch.int8， 对应 Tensor.char()
* BoolTensor，对应torch.bool，对应 Tensor.bool()

In [248]:
a = torch.ones(3, 3)
print(a.dtype)

torch.float32


In [249]:
a.half().dtype, a.half()  # 这里再强调一下，half，float16，是可以起到加速作用，但是需要GPU有支持，否则只会起到负面作用

(torch.float16,
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]], dtype=torch.float16))

In [250]:
a.float().dtype, a.float()

(torch.float32,
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]))

In [251]:
a.double().dtype, a.double()

(torch.float64,
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]], dtype=torch.float64))

In [252]:
a.int().dtype, a.int()

(torch.int32,
 tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]], dtype=torch.int32))

In [253]:
a.long().dtype, a.long()

(torch.int64,
 tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]]))

In [254]:
a.short().dtype, a.short()

(torch.int16,
 tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]], dtype=torch.int16))

In [255]:
a.bool().dtype, a.bool()

(torch.bool,
 tensor([[True, True, True],
         [True, True, True],
         [True, True, True]]))

In [256]:
a.byte().dtype, a.byte()

(torch.uint8,
 tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]], dtype=torch.uint8))

In [257]:
a.char().dtype, a.char()

(torch.int8,
 tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]], dtype=torch.int8))

#### 转换为python类型
* Tensor.item()，要求是标量
* Tensor.tolist()，没有要求

In [258]:
a = torch.randn(3, 3)
print(a)
print(a.tolist())
print(type(a.tolist()))

tensor([[-0.6213, -0.8936, -1.6769],
        [ 0.5573, -0.2916, -0.0440],
        [ 1.0579,  0.1702, -0.9787]])
[[-0.6212985515594482, -0.8936327695846558, -1.6768516302108765], [0.5573107600212097, -0.2916072905063629, -0.04396476224064827], [1.0579458475112915, 0.17024379968643188, -0.9786844849586487]]
<class 'list'>


In [259]:
a = torch.randn(1)
a, a.item()

(tensor([0.3348]), 0.3347892761230469)

#### 转换为numpy类型
* Tensor.numpy()

In [260]:
a = torch.randn(3, 3)
a.numpy()

array([[ 0.8425302 , -1.3351866 , -0.5375705 ],
       [-1.5410998 , -0.01299132, -0.9623573 ],
       [ 0.07405878, -1.0653986 , -0.9559853 ]], dtype=float32)

----

### 设备间复制的几种操作

In [261]:
a = torch.zeros(3, 3)
a

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [262]:
a.cpu(), a.cpu().device

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 device(type='cpu'))

In [263]:
a.cuda(2), a.cuda(2).device

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]], device='cuda:2'),
 device(type='cuda', index=2))

In [264]:
a.cuda("cuda:2"), a.cuda("cuda:2").device

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]], device='cuda:2'),
 device(type='cuda', index=2))

In [265]:
a.to("cuda:2"), a.to("cuda:2").device

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]], device='cuda:2'),
 device(type='cuda', index=2))

---

### 反向计算梯度backward

### $requires$_$grad$参数

In [266]:
x = torch.tensor([1., 2., 3.])
y = x * 2 + x
loss = y.sum()
loss.backward()
x.grad

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [267]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x * 2 + x
loss = y.sum()
loss.backward()
x.grad

tensor([3., 3., 3.])

### 选择部分计算，并求梯度

In [268]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x[1] * 3
y.backward()
x.grad

tensor([0., 3., 0.])

### 实现Relu的效果

In [269]:
x = torch.tensor([-1., -2., 3.], requires_grad=True)
y = x[x > 0].sum()
y.backward()
x.grad

tensor([0., 0., 1.])

---

### 脱离计算图 $detach$ / $detach$_  ： 把当前变量脱离计算图，避免其计算梯度

In [270]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x * 2 + x
loss = y.sum()
loss.backward()
x.grad

tensor([3., 3., 3.])

In [271]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
dx = x.detach()
y = dx * 2 + x  # dx 不参与求导
loss = y.sum()
loss.backward()
print("x.grad is", x.grad, ", x.requires_grad =", x.requires_grad)

x.grad is tensor([1., 1., 1.]) , x.requires_grad = True


In [272]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x * 2 + x
loss = y.sum()
x.detach_()   # inplace，此时x被修改，此时x的requires_grad=False
loss.backward()
print("x.grad is", x.grad, ", x.requires_grad =", x.requires_grad)

x.grad is None , x.requires_grad = False


---

### 基本函数/属性

In [273]:
a = torch.zeros(2, 2, 2, device="cpu")   # 创建全0 FloatTensor，具有2x2x2维度
print("a.is_cuda =", a.is_cuda)          # 判断tensor是否在cuda上，也就是gpu上
print("a.device =", a.device)            # 获取tensor所在的设备 
print("a.grad =", a.grad)                # 获取tensor的梯度，没有梯度则为None
print("a.ndim =", a.ndim)                # 获取tensor的维度数
print("a.dim() =", a.dim())              # 同ndim一样，获取维度数
print("a.ndimension() =", a.ndimension())# 同ndim一样，获取维度数
print("a.numel() =", a.numel())          # 获取tensor的元素数，其实是每个维度相乘
print("a.nelement() =", a.nelement())    # 获取tensor的元素数，同numel()
print("a.element_size() =", a.element_size())   # 获取元素的大小，Byte为单位，指的是单位元素大小，因为是float，所以是4字节

a.is_cuda = False
a.device = cpu
a.grad = None
a.ndim = 3
a.dim() = 3
a.ndimension() = 3
a.numel() = 8
a.nelement() = 8
a.element_size() = 4


---

### 张量的操作（补充）

转置

In [274]:
a = torch.arange(9).view(3, 3)
a

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [275]:
a.T

tensor([[0, 3, 6],
        [1, 4, 7],
        [2, 5, 8]])

In [276]:
a.t()

tensor([[0, 3, 6],
        [1, 4, 7],
        [2, 5, 8]])

In [277]:
print(torch.zeros(1, 2, 3).T.shape)
print(torch.zeros(3, 1, 2).T.shape)

torch.Size([3, 2, 1])
torch.Size([2, 1, 3])


##### Tensor扩展，重复，复制
* Tensor.expand(*sizes) → Tensor    扩展为指定大小，根据大小维度进行复制
* Tensor.expand_as(other) → Tensor  扩展为目标tensor大小

In [278]:
a = torch.tensor([1, 2, 3])
a

tensor([1, 2, 3])

In [279]:
a.expand(5, 3)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

In [280]:
a = torch.tensor([1, 2, 3]).view(3, 1)
a

tensor([[1],
        [2],
        [3]])

In [281]:
a.expand(3, 5)

tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3]])

In [282]:
a = torch.tensor([1, 2, 3])
b = torch.randn(5, 3)
a.expand_as(b)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

In [283]:
a = torch.tensor([1, 2, 3]).view(3, 1)
b = torch.randn(3, 5)
a.expand_as(b)

tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3]])

----

### 逻辑判断
* Tensor.all()  所有元素都为True，结果为True
* Tensor.any()  任意一个元素为True，结果为True

In [284]:
a = torch.randn(1, 3)
a

tensor([[-0.7552, -1.0490,  2.0919]])

In [285]:
boola = a > 0
boola

tensor([[False, False,  True]])

In [286]:
boola.all()

tensor(False)

In [287]:
boola.any()

tensor(True)