In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### 张量的创建

#### 直接创建
torch.tensor()
功能：从data创建tensor
参数：
data: 数据，可以是list, numpy

dtype: 数据类型，默认与data一致

device: 所在设备, cuda/cpu

requires_grad: 是否需要梯度

pin_memory: 是否存于锁页内存



In [2]:
import torch
import numpy as np

arr = np.ones((3,4))
print(arr.dtype)
t = torch.tensor(arr)
print(t)

float64
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=torch.float64)


#### 从numpy创建tensor
torch.from_numpy(ndarray)

功能：从numpy创建tensor

从torch.from_numpy()创建的tensor与原ndarray**共享内存**，当修改其中的一个数据，另一个也修改。

In [3]:
arr = np.array([[1,2,3], [4,5,6],[7,8,9]])
t = torch.from_numpy(arr)
arr
type(arr)
t 

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

numpy.ndarray

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]], dtype=torch.int32)

In [4]:
arr[0,0]=100
arr
t 

array([[100,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9]])

tensor([[100,   2,   3],
        [  4,   5,   6],
        [  7,   8,   9]], dtype=torch.int32)

In [5]:
t[-1,-1] = 200
t
arr 

tensor([[100,   2,   3],
        [  4,   5,   6],
        [  7,   8, 200]], dtype=torch.int32)

array([[100,   2,   3],
       [  4,   5,   6],
       [  7,   8, 200]])

#### 依据数值创建
torch.zeros()

功能：依size创建全0张量

参数：

size: 张量的形状，如(3,3), (3, 224, 224)

out: 输出的张量

dtype: 数据类型

layout: 内存中的布局形式，有strided, sparse_coo等

device: cpu/cuda

requires_grad 是否需要梯度

In [6]:
out_t = torch.tensor([1])
# out_t = out_t * 2
t = torch.zeros((3,3), out=out_t)
t 
print(id(t), id(out_t))
id(t) == id(out_t)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

3071260454656 3071260454656


True

**torch.zeros_like()**

功能：依input形状创建全0张量

input：创建与input同形状的全0张量

dtype: 数据类型

layout: 内存中布局形式

device:

requires_grad:

**torch.ones()**

**torch.ones_like()**

**torch.full()**

**torch.full_like()**

参数：

size: 张量的形状，如(3,3)

fill_value: 张量的值

In [7]:
t = torch.full((5,5), 55)
t 

tensor([[55, 55, 55, 55, 55],
        [55, 55, 55, 55, 55],
        [55, 55, 55, 55, 55],
        [55, 55, 55, 55, 55],
        [55, 55, 55, 55, 55]])

**torch.arange()**

功能： 创建等差的**1维张量**

数值区间为\[start, end\), step 默认为1


In [8]:
t = torch.arange(2,20,2)
t 

tensor([ 2,  4,  6,  8, 10, 12, 14, 16, 18])

**torch.linspace()

功能：创建均分数列

数值区间为\[start,end\], steps为数列长度

In [9]:
t = torch.linspace(2,20,12)
t 

tensor([ 2.0000,  3.6364,  5.2727,  6.9091,  8.5455, 10.1818, 11.8182, 13.4545,
        15.0909, 16.7273, 18.3636, 20.0000])

**torch.logspace()**

功能：创建对数均分的**1维张量**

数值区间\[start,end\], 数列长度为steps,底为base，默认为10

**torch.eye()**

功能：创建单位对角矩阵**(2维方阵)**


#### 依概率分布创建张量

torch.normal()

生成正态分布

mean: 均值

std: 标准差

四种模式：
mean 标量 std 标量

mean 标量 std 张量

mean 张量 std 标量

mean 张量 std 张量

In [10]:
# mean: 张量 std: 张量
mean = torch.arange(1,5, dtype = torch.float)
std = torch.arange(1,5, dtype = torch.float)
# out_t = torch.tensor([0.,1.,2.,3.])
t_normal = torch.normal(mean, std) 
print("mean:{}\t std:{}".format(mean, std))
print(t_normal)

mean:tensor([1., 2., 3., 4.])	 std:tensor([1., 2., 3., 4.])
tensor([0.0807, 3.8159, 0.9115, 3.1639])


In [11]:
# mean 标量 std 标量, 此时要加一个参数size

t_normal = torch.normal(0., 1., size=(4,))
t_normal

tensor([ 0.3537,  0.5416, -0.2911, -2.1719])

In [12]:
# mean 张量， std 标量

mean = torch.arange(1,5, dtype = torch.float)
std = 1
t_normal = torch.normal(mean, std)
t_normal

tensor([1.6843, 1.9532, 2.4244, 3.5009])

**torch.randn()**

**torch.randn_like()**

功能: 生成标准正态分布

参数： 
size: 张量的形状


**torch.rand()**

**torch.rand_like()**

功能：在区间\[0,1\)上，生成**均匀分布**

**torch.randint()**

**torch.randint_like()**

功能：在区间\[low,high\)上，生成**整数均匀分布**

size： 张量的形状

**torch.randperm(n)**

功能：生成从0到n-1的随机数列

n: 张量的长度

**torch.bernoulli()**

功能：以input维概率，生成伯努利分布（0-1分布，两点分布）



### 张量的操作

#### 拼接与切分

**torch.cat()**

将张量按维度dim进行拼接

tensors: 张量序列

dim: 要拼接的维度

**torch.stack()**

在**在新创建的维度dim**上进行拼接

tensors: 张量序列

dim: 要拼接的维度


In [13]:
t0 = torch.zeros((2,3))
t1 = torch.ones((2,3))

t_0 = torch.cat([t0,t1, t0], dim=0)
t_1 = torch.cat([t0,t1, t0], dim=1)
t_0
t_0.shape
t_1
t_1.shape

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])

torch.Size([6, 3])

tensor([[0., 0., 0., 1., 1., 1., 0., 0., 0.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0.]])

torch.Size([2, 9])

In [14]:
t = torch.zeros((2,3))

t_stack = torch.stack([t,t], dim=0)
t_stack
t_stack.shape

tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])

torch.Size([2, 2, 3])

**torch.chunk()**

将张量按维度dim进行平均切分

返回张量列表

若不能整除，最后一份张量的大小小于其他张量


In [15]:
t = torch.ones((2,7))
list_of_tensors = torch.chunk(t, dim=1, chunks = 3)

for idx, t in enumerate(list_of_tensors):
    print("No.{} tensor: shape is {}".format(idx+1, t.shape))

No.1 tensor: shape is torch.Size([2, 3])
No.2 tensor: shape is torch.Size([2, 3])
No.3 tensor: shape is torch.Size([2, 1])


**torch.split()**

将张量按维度dim进行切分

返回张量列表

tensor： 要切分的张量

split_size_or_sections: 为int时，表示每一份的长度；为list时，按list元素切分



In [16]:
a = torch.ones((2,5))
list_of_tensors = torch.split(a, dim=1, split_size_or_sections = [0,2,3])

for idx, t in enumerate(list_of_tensors):
    print("No.{} tensor: shape is {}".format(idx+1, t.shape))

No.1 tensor: shape is torch.Size([2, 0])
No.2 tensor: shape is torch.Size([2, 2])
No.3 tensor: shape is torch.Size([2, 3])


#### 张量索引

**torch.index_select()**

在维度dim上，按index索引数据

返回按index索引数据拼接的张量


In [17]:
t = torch.randint(0,9, size=(3,3))
idx = torch.tensor([0,2], dtype=torch.long)    # 数据类型只能时torch.long, 不能为torch.float
t_select = torch.index_select(t, dim=0, index=idx)
print("t:{}\nt_select:{}".format(t, t_select))

t:tensor([[2, 6, 5],
        [7, 3, 6],
        [1, 3, 1]])
t_select:tensor([[2, 6, 5],
        [1, 3, 1]])


**torch.masked_select()**

按mask中的True进行索引

返回一维张量。因为不能确定True的个数，因此也不能确定返回的形状

input: 要索引的张量

mask：与mask同形状的布尔类型张量

In [18]:
t = torch.randint(0,9, size=(3,3))
mask = t.ge(5)  #ge, gt, le, lt
t_select = torch.masked_select(t, mask)
print("t:{}\nt_select:{}".format(t, t_select))

t:tensor([[4, 0, 3],
        [8, 5, 3],
        [1, 4, 8]])
t_select:tensor([8, 5, 8])


#### 张量变换

**torch.reshape()**

变换张量形状

当张量在内存中是连续时，新张良与input共享数据内存



In [19]:
t = torch.randperm(8)
t_reshaped = torch.reshape(t, (2,-1))
t[0]=1024
t
t_reshaped

tensor([1024,    7,    1,    0,    4,    3,    6,    2])

tensor([[1024,    7,    1,    0],
        [   4,    3,    6,    2]])

**torch.transpose()**

交换张量的两个维度

input: 要交换的张量

dim0： 要交换的维度

dim1： 要交换的维度

**torch.t()**

2维张量转置，对矩阵而言，等价于

torch.t(input, 0, 1)

In [20]:
t = torch.rand(2,3,4)
t_transposed = torch.transpose(t, dim0=1, dim1=2)
t.shape
t_transposed.shape

torch.Size([2, 3, 4])

torch.Size([2, 4, 3])

**torch.squeeze()**

压缩张量中长度为1的维度（轴）

dim:若为None, 移除所有长度为1的轴；若指定维度，当且仅当该轴长度为1时，可以被移除。

**torch.unsqueeze()**
依据dim扩展维度


In [21]:
t = torch.rand((1,2,3,1))
t_sq = torch.squeeze(t)
t_0 = torch.squeeze(t, dim=0)
t_1 = torch.squeeze(t, dim=1)
t.shape
t_sq.shape
t_0.shape
t_1.shape

torch.Size([1, 2, 3, 1])

torch.Size([2, 3])

torch.Size([2, 3, 1])

torch.Size([1, 2, 3, 1])

### 张量的数学运算

#### 加法

**torch.add(input, other, alpha, out=None)**

input: 第一个张量

alpha: 乘项因子

other: 第二个张量

**torch.addcdiv(input, value = 1, tensor1, tensor2, out=None) **

$ out_i = input_i + value \times \cfrac{tensor1_i}{tensor2_i} $ 

**torch.addcmul(input, value = 1, tensor1, tensor2, out=None) **

$ out_i = input_i + value \times tensor1_i \times tensor2_i $ 

In [22]:
t_0 = torch.randn((3,3))
t_1 = torch.ones_like(t_0)
t_add = torch.add(t_0, t_1)
t_add2 = torch.add(t_0,t_1,alpha=10)

t_0
t_1
t_add
t_add2

tensor([[-0.1648,  0.5483,  0.6527],
        [-1.0764,  0.8431,  0.1505],
        [ 1.4356, -1.3248, -1.1615]])

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

tensor([[ 0.8352,  1.5483,  1.6527],
        [-0.0764,  1.8431,  1.1505],
        [ 2.4356, -0.3248, -0.1615]])

tensor([[ 9.8352, 10.5483, 10.6527],
        [ 8.9236, 10.8431, 10.1505],
        [11.4356,  8.6752,  8.8385]])

### 线性回归

求解步骤：

1. 确定模型Model

$ y = wx + b$

2. 选择损失函数

MSE

$ \cfrac{1}{m}\sum_{i=1}^m（y_i - \hat y_i)^2 $

3. 求解梯度并更新$w, b$

$ w = w - LR \times w.grad $

$ b = b = LR \times w.grad $

In [23]:
import torch
import matplotlib.pyplot as plt

torch.manual_seed(10)

lr = 0.1

# create training data
x = torch.rand(20, 1) * 10    # x data (tensor)
y = 2 * x + (5 + torch.randn(20,1)) # y data (tensor)

w = torch.randn((1), requires_grad = True)
b = torch.zeros((1), requires_grad = True)

for iteration in range(1000):
    # forward propagation
    wx = torch.mul(w, x)
    y_pred = torch.add(wx, b)
    
    # calculate loss
    loss = (0.5 * (y-y_pred)** 2).mean()
    
    # backward propagation
    loss.backward()

    # update para
    b.data.sub_(lr * b.grad)
    w.data.sub_(lr * w.grad)
    
    # drawing
#     if iteration % 20 == 0:
#             plt.scatter(x.data.numpy(), y.data.numpy())
#             plt.plot(x.data.numpy(), y_pred.data.numpy())
#             plt.text(2,20, 'Loss=%.4f' % loss.data.numpy())
#             plt.xlim(1.5, 10)
#             plt.ylim(8, 28)
#             plt.title('Iteration:{}\n w:{} b: {}'.format(iteration, w.data.numpy(), b.data.numpy()))
#             plt.pause(0.5)
            
    if loss.data.numpy() < 1:
        break

print('Iteration:{}\n w:{} b: {} loss: {}'.format(iteration, w.data.numpy(), b.data.numpy(), loss.data.numpy()))

<torch._C.Generator at 0x2cb138ec430>

tensor([0.3637])

tensor([4.1880])

tensor([0.0147])

tensor([1.3747])

tensor([0.5249])

tensor([3.5258])

tensor([0.6680])

tensor([3.1058])

tensor([0.6593])

tensor([1.4534])

tensor([1.4023])

tensor([4.1478])

tensor([1.3507])

tensor([1.6936])

tensor([1.8505])

tensor([2.3982])

tensor([2.4668])

tensor([3.6116])

tensor([2.4750])

tensor([0.9037])

tensor([3.3536])

tensor([3.4051])

tensor([3.6483])

tensor([2.1973])

tensor([3.9894])

tensor([1.1939])

Iteration:12
 w:[1.193853] b: [3.9894037] loss: 0.6023467183113098


### Computation Graph

In [27]:
import torch

w = torch.tensor([1.], requires_grad = True)
x = torch.tensor([2.], requires_grad = True)

a = torch.add(w, x)
b = torch.add(w, 1)
y = torch.mul(a,b)

y.backward()    # call torch.autograd.backward()

# check is_leaf
print("is_leaf: \n", w.is_leaf, x.is_leaf, a.is_leaf, b.is_leaf, y.is_leaf)

# check grad
print("gradient: \n", w.grad, x.grad, a.grad, b.grad, y.grad)

is_leaf: 
 True True False False False
gradient: 
 tensor([5.]) tensor([2.]) None None None


  print("gradient: \n", w.grad, x.grad, a.grad, b.grad, y.grad)


grad_fn: 记录创建该张量时所用的方法（函数）

y.grad_fn = \<MulBackward0\>

a.grad_fn = \<AddBackward0\>

b.grad_fn = \<AddBackward0\>


In [25]:
# check grad_fn
print("grad_fn: \n", w.grad_fn, x.grad_fn, a.grad_fn, b.grad_fn, y.grad_fn)

# leaf node grad_fn is None

grad_fn: 
 None None <AddBackward0 object at 0x000002CB156489A0> <AddBackward0 object at 0x000002CB15648910> <MulBackward0 object at 0x000002CB15648340>


### 动态图 vs 静态图

根据计算图搭建方式，可以分为**静态图和动态图**。

动态图：运算与搭建**同时**进行

静态图： **先**搭建图，**后**计算

### autograd 自动求导系统 torch.autograd

torch.autograd.backward(tensors, grad_tensors = None, retain_graph = None, create_graph = False)

自动求取梯度

tensors: 用于求导的张量，如loss

retain_graph： 保存计算图

create_graph：创建导数计算图，用于高阶求导

grad_tensors: 多梯度权重


In [38]:
import torch

w = torch.tensor([1.], requires_grad = True)
x = torch.tensor([2.], requires_grad = True)

a = torch.add(w, x)
b = torch.add(w, 1)
y0 = torch.mul(a,b)    # y0 = (w+x)*(w+1)
y1 = torch.add(a,b)    # y1 = (w+x)+ (w+1)
loss = torch.cat([y0,y1], dim=0)    # dy1/dw = 2

grad_tensors = torch.tensor([1.,2.])

loss.backward(gradient = grad_tensors)    # call torch.autograd.backward()

print(w.grad)    # 5*1+2*2

print(a.requires_grad, b.requires_grad, y.requires_grad)    # a,b,y default requires_grad is True

tensor([9.])
True True True


**torch.autograd.grad()**

torch.autograd.grad(outputs, inputs, grad_outputs = None, retain_graph=None, create_graph=False)

求取梯度

outputs: 用于求导的张量，如loss

inputs: 需要梯度的张量

retain_graph： 保存计算图

create_graph：创建导数计算图，用于高阶求导

grad_outputs: 多梯度权重


In [32]:
x = torch.tensor([3.], requires_grad = True)
y = torch.pow(x,2)    # y = x**2

grad_1 = torch.autograd.grad(y, x, create_graph=True) # grad_1 = dy/dx = 2*x = 2*3 =6
print(grad_1)

grad_2 = torch.autograd.grad(grad_1[0], x)  # grad_2 = d(2x)/dx = 2
print(grad_2)

(tensor([6.], grad_fn=<MulBackward0>),)
(tensor([2.]),)


**autograd tips**

- 梯度不清零

In [37]:
w = torch.tensor([1.], requires_grad = True)
b = torch.tensor([2.], requires_grad = True)

for i in range(2):
    a = torch.add(w, x)
    b = torch.add(w, 1)
    y = torch.mul(a, b)
    
    y.backward()
    print(w.grad)
    w.grad.zero_()    #grad clear to 0 manually
    

tensor([6.])


tensor([0.])

tensor([6.])


tensor([0.])