In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### What is PYTORCH?

It's a Python-based scientific computing package targeted at two sets of audiences:
- A replacement for Numpy to use the power of GPUs;
- a deep learning research platform that provides maximum flexibility and speed.


#### Getting Started

##### Tensors
Tensors are similar to Numpy's **ndarrays**, with the addition being that Tensors can also be used on a GPU to accelerate computing.


In [2]:
from __future__ import print_function
import torch
import torchvision

print(f'Torch version: {torch.__version__}')
print(f'Torchvision version: {torchvision.__version__}')
print(torch)

 # Construct a 5x3 matrix, uninitialized
x = torch.empty(5,3)
print(x)

# Construct a randomily initialized matrix
x = torch.rand(5,3)
print(x)

# Construct a zeros matrix
x = torch.zeros(5,3, dtype=torch.long)
print(x)

# Construct a tensor through data
x = torch.tensor([5.5, 3])
print(x)

Torch version: 1.8.1+cpu
Torchvision version: 0.9.1+cpu
<module 'torch' from 'C:\\work2\\anaconda3\\envs\\py38\\lib\\site-packages\\torch\\__init__.py'>
tensor([[1.0286e-38, 6.2450e-39, 1.0653e-38],
        [8.4490e-39, 1.0194e-38, 8.9082e-39],
        [8.4490e-39, 1.0102e-38, 1.0469e-38],
        [9.6429e-39, 8.4490e-39, 6.2449e-39],
        [8.4490e-39, 1.0194e-38, 9.9184e-39]])
tensor([[0.3631, 0.7807, 0.3014],
        [0.2742, 0.6922, 0.5276],
        [0.0675, 0.8883, 0.2585],
        [0.7685, 0.0581, 0.9728],
        [0.4284, 0.9582, 0.2101]])
tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
tensor([5.5000, 3.0000])


In [3]:
# construct a tensor base on an existing tensor
# new_* methods take in sizes
x = x.new_ones(5,3,dtype=torch.double) 
print(x, x.dtype)

#override dtype!
x = torch.randn_like(x, dtype=torch.float)
print(x, x.dtype, x.size())

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64) torch.float64
tensor([[-0.7963, -0.7738,  1.8228],
        [ 0.3489, -0.5416, -0.3281],
        [-0.0874, -0.7715, -0.2776],
        [-0.3673, -1.6102, -0.6255],
        [-1.6857, -1.6342, -0.4254]]) torch.float32 torch.Size([5, 3])


In [4]:
# construct a matrix filled zeros and of dtype long
x = torch.zeros(5,3, dtype=torch.long)
print(x)


tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [5]:
print(x.dtype)

torch.int64


In [6]:
# Construct a tensor directly from data
x = torch.tensor([5.5, 3])
print(x)

tensor([5.5000, 3.0000])


In [7]:
# to create a tensor based on an existing tensor
x = x.new_ones(5,3, dtype=torch.double)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [8]:
x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [9]:
# get the size
print(x.size())

torch.Size([5, 3])


torch.Size is a tuple.

##### Operations

In [10]:
y = torch.zeros(5,3)
print(y)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [11]:
# syntax 1
print(x+y)


tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [12]:
# syntax 2
print(torch.add(x,y))

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [13]:
# providing an output tensor as argument
result = torch.empty(5,3)
torch.add(x,y, out=result)
print(result)

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [14]:
y=torch.empty(5,3)
print(y)

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [15]:
# in-place
# add x to y
y.add_(x)
print(y)

tensor([[ 2.8919,  0.5565,  0.2027],
        [-2.9289, -2.3280,  0.9811],
        [ 0.7017,  1.7742,  1.0039],
        [-2.9977,  1.0937,  3.3203],
        [-0.0381,  0.5440, -0.7603]])

tensor([[ 2.8919,  0.5565,  0.2027],
        [-2.9289, -2.3280,  0.9811],
        [ 0.7017,  1.7742,  1.0039],
        [-2.9977,  1.0937,  3.3203],
        [-0.0381,  0.5440, -0.7603]])


Any operation that mutates a tensor in-place is post-fixed with an _.
For example, x.copy_(y), x.t_() will change x.

In [16]:
print(x)

tensor([[ 1.4459,  0.2783,  0.1014],
        [-1.4645, -1.1640,  0.4906],
        [ 0.3508,  0.8871,  0.5020],
        [-1.4988,  0.5469,  1.6601],
        [-0.0190,  0.2720, -0.3801]])


In [17]:
print(x[:,1])

tensor([ 0.2783, -1.1640,  0.8871,  0.5469,  0.2720])


In [18]:
# torch.view to resize/reshape tensor
x = torch.randn(4,4)
y=x.view(16)
z=x.view(-1,8)   # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


If you have a one element tensor, use .item() to get the value as a Python number

In [19]:
x = torch.randn(1)
print(x)

tensor([1.0014])


In [20]:
print(x.item())

1.0013766288757324


docs at https://pytorch.org/docs/stable/torch.html

#### Numpy Bridge

Converting a Torch Tensor to a Numpy array and vice versa is a breeze.

The Torch Tensor and Numpy array will share their underlying memory locations (if the Torch Tensor is on CPU), and changing one will change the other.

##### Converting a Torch Tensor to a Numpy Array


In [21]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [22]:
b = a.numpy()
print(b)
print(type(b))

[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [23]:
# tensor and numpy array change in value
a.add_(1)    # change the tensor will change the numpy automatically
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


##### Converting Numpy Array to Torch Tensor

In [24]:
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1, out=a)    # change the numpy will change the tensor automatically
print(a)
print(b)

array([2., 2., 2., 2., 2.])

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [25]:
np.add(a,-3, out=a)
print(a)
print(b)

array([-1., -1., -1., -1., -1.])

[-1. -1. -1. -1. -1.]
tensor([-1., -1., -1., -1., -1.], dtype=torch.float64)


**All the Tensors on the CPU except a CharTensor support converting to NumPy and back.**

#### CUDA Tensors

In [26]:
torch.cuda.is_available()

False

Tensors can be moved onto any device using the **.to()** method.

let us run this cell only if CUDA is available
We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!
 
Out:
tensor([1.3417], device='cuda:0')
tensor([1.3417], dtype=torch.float64)

#### 张量的拼接与切分

##### **torch.cat()**
- 将张量按维度dim进行拼接
torch.cat(tensors, dim=0, out=None)

##### **torch.stack()**
- 在**新创建的维度dim上**进行拼接
torch.cat(tensors, dim=0, out=None)


In [34]:
t = torch.ones((2,3))
t
t1 = torch.cat([t,t], dim=0)
t2 = torch.cat([t,t], dim=1)
t1, t1.shape
t2, t2.shape

tensor([[1., 1., 1.],
        [1., 1., 1.]])

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.Size([4, 3]))

(tensor([[1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.]]),
 torch.Size([2, 6]))

In [32]:
t = torch.ones((2,3))
t1 = torch.stack([t,t], dim=2)
t1,t1.shape

(tensor([[[1., 1.],
          [1., 1.],
          [1., 1.]],
 
         [[1., 1.],
          [1., 1.],
          [1., 1.]]]),
 torch.Size([2, 3, 2]))

In [33]:
t2 = torch.stack([t,t],dim=0)
t2,t2.shape

(tensor([[[1., 1., 1.],
          [1., 1., 1.]],
 
         [[1., 1., 1.],
          [1., 1., 1.]]]),
 torch.Size([2, 2, 3]))

In [35]:
t3 = torch.stack([t,t],dim=1)
t3,t3.shape

(tensor([[[1., 1., 1.],
          [1., 1., 1.]],
 
         [[1., 1., 1.],
          [1., 1., 1.]]]),
 torch.Size([2, 2, 3]))

##### **torch.chunk()**
- 将张量按维度进行**平均**切分,最后一块张量小于其他张量

torch.chunk(input, chunks, dim=0)

In [45]:
a = torch.randn((2,7))
a 
list_of_tensors = torch.chunk(a, chunks=3, dim=1)

for idx, c in enumerate(list_of_tensors):
    print(f'#{idx+1} tensor {c} shape is {c.shape}')

tensor([[-1.5811,  1.3463,  1.7464,  1.2265,  0.9617,  1.2656,  0.7929],
        [ 0.8065,  2.0841,  0.4143, -0.6469,  0.0567,  0.0787,  1.2527]])

#1 tensor tensor([[-1.5811,  1.3463,  1.7464],
        [ 0.8065,  2.0841,  0.4143]]) shape is torch.Size([2, 3])
#2 tensor tensor([[ 1.2265,  0.9617,  1.2656],
        [-0.6469,  0.0567,  0.0787]]) shape is torch.Size([2, 3])
#3 tensor tensor([[0.7929],
        [1.2527]]) shape is torch.Size([2, 1])


##### **torch.split()**
torch.split(tensor, split_size_or_sections, dim)


In [49]:
t = torch.randn((2,5))
list_of_tensors = torch.split(t, [2,1,2], dim=1)    # split_size_or_sections=2
for idx, c in enumerate(list_of_tensors):
    print(f'#{idx+1} tensor {c} size is {c.shape}')
    

#1 tensor tensor([[-1.3168,  0.4270],
        [ 0.0867, -0.3351]]) size is torch.Size([2, 2])
#2 tensor tensor([[ 0.7941],
        [-1.1873]]) size is torch.Size([2, 1])
#3 tensor tensor([[-0.9725,  1.2493],
        [-0.4420, -0.6818]]) size is torch.Size([2, 2])


#### 张量的索引

##### **torch.index_select(input, dim, index, out=None)**

In [59]:
t = torch.randint(0,9, size=(3,3))
idx = torch.tensor([0,2], dtype=torch.long)    #dtype=torch.long
t_select=torch.index_select(t,dim=1, index=idx,)
print('t:\n{}\n\nt_select:\n{}'.format(t, t_select))

t:
tensor([[7, 0, 3],
        [0, 3, 7],
        [3, 6, 8]])

t_select:
tensor([[7, 3],
        [0, 7],
        [3, 8]])


##### **torch.masked_select(input, mask)**
按mask中的True进行索引

返回一维的张量

mask： 与input同形状的张量

In [60]:
t = torch.randint(0,9, size=(3,3))
mask =t.ge(5)
t_mask = torch.masked_select(t,mask)
print('t:\n{}\nmask:{}\nt_mask:{}'.format(t, mask, t_mask))

t:
tensor([[7, 5, 5],
        [2, 5, 5],
        [8, 3, 8]])
mask:tensor([[ True,  True,  True],
        [False,  True,  True],
        [ True, False,  True]])
t_mask:tensor([7, 5, 5, 5, 5, 8, 8])


#### 张量的变换

##### torch.reshape(tensor, shape)

当张量在内存中是连续的时，新张量与input共享内存

In [65]:
t = torch.randperm(8) 
t_reshape = torch.reshape(t, (2,4))
t_reshape2 = torch.reshape(t, (4,-1))
print('t:\n{}\nt_reshape:\n{}\nt_reshape2:{}'.format(t, t_reshape, t_reshape2))

t[0]=1024
print('t:\n{}\nt_reshape:{}'.format(t, t_reshape))
print('t.data memory address: {}'.format(id(t.data)))
print('t_reshape.data memory address: {}'.format(id(t_reshape.data)))

t:
tensor([6, 3, 2, 0, 4, 1, 5, 7])
t_reshape:
tensor([[6, 3, 2, 0],
        [4, 1, 5, 7]])
t_reshape2:tensor([[6, 3],
        [2, 0],
        [4, 1],
        [5, 7]])
t:
tensor([1024,    3,    2,    0,    4,    1,    5,    7])
t_reshape:tensor([[1024,    3,    2,    0],
        [   4,    1,    5,    7]])
t.data memory address: 2040928906816
t_reshape.data memory address: 2040923481472


##### torch.transpose(input, dim0, dim1)

dim0: 要交换的维度
dim1: 要交换的维度

##### torch.t()

2维张量转置，对矩阵而言，等价于torch.transpose(input, 0, 1)

In [67]:
t = torch.rand((2,3,4))
t_transpose = torch.transpose(t, dim0=1, dim1=2)
print('t:\n{}\nt.shape:{}\nt_transpose:\n{}\nt_transpose.shape:{}'.format(t,t.shape, t_transpose,t_transpose.shape))

t:
tensor([[[0.2090, 0.8900, 0.2987, 0.7865],
         [0.1981, 0.9301, 0.0066, 0.7446],
         [0.6694, 0.2211, 0.0878, 0.0815]],

        [[0.5490, 0.8670, 0.8417, 0.0608],
         [0.2354, 0.4237, 0.7761, 0.1088],
         [0.8502, 0.1760, 0.9430, 0.5000]]])
t.shape:torch.Size([2, 3, 4])
t_transpose:
tensor([[[0.2090, 0.1981, 0.6694],
         [0.8900, 0.9301, 0.2211],
         [0.2987, 0.0066, 0.0878],
         [0.7865, 0.7446, 0.0815]],

        [[0.5490, 0.2354, 0.8502],
         [0.8670, 0.4237, 0.1760],
         [0.8417, 0.7761, 0.9430],
         [0.0608, 0.1088, 0.5000]]])
t_transpose.shape:torch.Size([2, 4, 3])


##### **torch.squeeze(input, dim=None, out=None)**

压缩长度为1的维度（轴）。
dim 为None，移除所有长度为1的维度；若指定维度，当且仅当该轴长度为1时，可以被移除。

##### **torch.usqueeze(input, dim, out=None)**
依据dim扩展维度

In [72]:
t = torch.rand((1,2,3,1))
t
t_sq=torch.squeeze(t)
t0=torch.squeeze(t, dim = 0)
t1=torch.squeeze(t, dim = 1)
print("t_sq:\n{}\nt_sq shape:\n{}".format(t_sq, t_sq.shape))
print("t0:\n{}\nt0_size:{}".format(t0,t0.shape))
print("t1:\n{}\nt1_size:{}".format(t1,t1.shape))

tensor([[[[0.8396],
          [0.9472],
          [0.7615]],

         [[0.2825],
          [0.3931],
          [0.5276]]]])

t_sq:
tensor([[0.8396, 0.9472, 0.7615],
        [0.2825, 0.3931, 0.5276]])
t_sq shape:
torch.Size([2, 3])
t0:
tensor([[[0.8396],
         [0.9472],
         [0.7615]],

        [[0.2825],
         [0.3931],
         [0.5276]]])
t0_size:torch.Size([2, 3, 1])
t1:
tensor([[[[0.8396],
          [0.9472],
          [0.7615]],

         [[0.2825],
          [0.3931],
          [0.5276]]]])
t1_size:torch.Size([1, 2, 3, 1])


#### 张量的数学运算

![image.png](attachment:image.png)

##### torch.add(input,alpha=1, other, out=None)

逐元素计算input + alpha $\times$ other

##### torch.addcmul(input, value=1, tensor1, tensor2, out=None)

$out_i = input_i + value \times tensor1_i \times tensor2_i$

##### torch.addcdiv(input, value=1, tensor1, tensor2, out=None)

$out_i = input_i + value \times \frac{tensor1_i}{tensor2_i}$

In [76]:
t0 = torch.randn((3,3))
t1 = torch.ones_like(t0)
t_add = torch.add(t0, 10, t1)
print('t0:\n{}\nt1:\n{}\nt_10_add: {}'.format(t0, t1, t_add))

t0:
tensor([[-1.0329, -1.0514, -1.7611],
        [ 1.1533,  0.1652, -1.3494],
        [ 0.8032, -0.7031,  0.6719]])
t1:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
t_10_add: tensor([[ 8.9671,  8.9486,  8.2389],
        [11.1533, 10.1652,  8.6506],
        [10.8032,  9.2969, 10.6719]])
