## Linear algebra
### scalars

In [2]:
import torch

x = torch.tensor([3.0])
y = torch.tensor([2.0])

x+y, x * y, x / y, x**y



(tensor([5.]), tensor([6.]), tensor([1.5000]), tensor([9.]))

In [3]:
print(x.tolist())

[3.0]


### Vectors

In [4]:
x = torch.arange(4)
x



tensor([0, 1, 2, 3])

In [5]:
x[3]

tensor(3)

### Length, dimensionality and shape

In [6]:
x.shape

torch.Size([4])

In [7]:
a = 2
x = torch.Tensor([1,2,3])
y = torch.Tensor([10,20,30])
a*x,a * x + y

(tensor([2., 4., 6.]), tensor([12., 24., 36.]))

In [8]:
print(torch.arange(10))
A = torch.arange(20).reshape((2,10))
print(A)


tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])


In [9]:
## transpose
print(A.T)

tensor([[ 0, 10],
        [ 1, 11],
        [ 2, 12],
        [ 3, 13],
        [ 4, 14],
        [ 5, 15],
        [ 6, 16],
        [ 7, 17],
        [ 8, 18],
        [ 9, 19]])


In [10]:
# symmetric matrix
B = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
B

tensor([[1, 2, 3],
        [2, 0, 4],
        [3, 4, 5]])

In [11]:
B ==B.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

### tensors

In [12]:
X = torch.arange(24).reshape((2, 3, 4))
print(('X.shape = ',X.shape))
print('X = ', X)

('X.shape = ', torch.Size([2, 3, 4]))
X =  tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])


### Basic Properties of Tensor Arithmetic

In [13]:
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
B = A.clone()
A, A+B

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  2.,  4.,  6.],
         [ 8., 10., 12., 14.],
         [16., 18., 20., 22.],
         [24., 26., 28., 30.],
         [32., 34., 36., 38.]]))


$\mathbf{A}\odot\mathbf{B}$ =
\begin{bmatrix}
    a_{11}  b_{11} & a_{12}  b_{12} & \dots  & a_{1n}  b_{1n} \\
    a_{21}  b_{21} & a_{22}  b_{22} & \dots  & a_{2n}  b_{2n} \\
    \vdots & \vdots & \ddots & \vdots \\
    a_{m1}  b_{m1} & a_{m2}  b_{m2} & \dots  & a_{mn}  b_{mn}
\end{bmatrix}



In [14]:
# , elementwise multiplication of two matrices is called their Hadamard product (math notation  ⊙ ).
# A ⊙ B = [a_ij*b_ij]
A * B

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

In [15]:
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

### Reduction

In [16]:
# sum()会把tensor里的所有元素相加
x = torch.arange(4, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2., 3.]), tensor(6.))

In [17]:
A, A.shape, A.sum()

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 torch.Size([5, 4]),
 tensor(190.))

In [18]:
A_sum_axis0 = A.sum(axis=0)
A , A_sum_axis0, A_sum_axis0.shape

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([40., 45., 50., 55.]),
 torch.Size([4]))

In [19]:
A_sum_axis1 = A.sum(axis=1)
A, A_sum_axis1, A_sum_axis1.shape

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([ 6., 22., 38., 54., 70.]),
 torch.Size([5]))

In [20]:
A.sum(axis=[0,1]) # same as A.sum()

tensor(190.)

In [21]:
# numel() 统计数量
A.mean(), A.sum() / A.numel()

(tensor(9.5000), tensor(9.5000))

### Non-Reduction sum

In [22]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A,sum_A.shape

(tensor([[ 6.],
         [22.],
         [38.],
         [54.],
         [70.]]),
 torch.Size([5, 1]))

In [23]:
A / sum_A

tensor([[0.0000, 0.1667, 0.3333, 0.5000],
        [0.1818, 0.2273, 0.2727, 0.3182],
        [0.2105, 0.2368, 0.2632, 0.2895],
        [0.2222, 0.2407, 0.2593, 0.2778],
        [0.2286, 0.2429, 0.2571, 0.2714]])

In [24]:
# calculate the cumulative sum of elements of A
A,A.cumsum(axis=0)


(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  6.,  8., 10.],
         [12., 15., 18., 21.],
         [24., 28., 32., 36.],
         [40., 45., 50., 55.]]))

### Dot Products
$\mathbf{x}, \mathbf{y} \in \mathbb{R}^d$, 他们的点成运算可以表示为：$\mathbf{x}^\top \mathbf{y}$ (or $\langle \mathbf{x}, \mathbf{y}  \rangle$) 对应位置相乘再相加： $\mathbf{x}^\top \mathbf{y} = \sum_{i=1}^{d} x_i y_i$.


In [25]:
y = torch.ones(4, dtype = torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2., 3.]), tensor([1., 1., 1., 1.]), tensor(6.))

In [26]:
# 等效操作先相乘再求和
torch.sum( x * y)


tensor(6.)

### Matrix-Vector Products
矩阵A：$\mathbf{A} \in \mathbb{R}^{m \times n}$
向量x:$\mathbf{x} \in \mathbb{R}^n$
A矩阵的行向量:
$\mathbf{A}=
\begin{bmatrix}
\mathbf{a}^\top_{1} \\
\mathbf{a}^\top_{2} \\
\vdots \\
\mathbf{a}^\top_m \\
\end{bmatrix},$

$
\mathbf{A}\mathbf{x}
= \begin{bmatrix}
\mathbf{a}^\top_{1} \\
\mathbf{a}^\top_{2} \\
\vdots \\
\mathbf{a}^\top_m \\
\end{bmatrix}\mathbf{x}
= \begin{bmatrix}
 \mathbf{a}^\top_{1} \mathbf{x}  \\
 \mathbf{a}^\top_{2} \mathbf{x} \\
\vdots\\
 \mathbf{a}^\top_{m} \mathbf{x}\\
\end{bmatrix}.
$

In [27]:
A, x , torch.mv(A, x)


(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([0., 1., 2., 3.]),
 tensor([ 14.,  38.,  62.,  86., 110.]))

### matrix-matrix multiplication
$\mathbf{A} \in \mathbb{R}^{n \times k},\mathbf{B} \in \mathbb{R}^{k \times m}$

In [32]:
B = torch.ones(4,3)
A, B ,torch.mm(A, B)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[ 6.,  6.,  6.],
         [22., 22., 22.],
         [38., 38., 38.],
         [54., 54., 54.],
         [70., 70., 70.]]))

### Norm
L1范式：$ \|\mathbf{x}\|_1 = \sum_{i=1}^n \left|x_i \right|$

L2范式：$\|\mathbf{x}\|_2 = \sqrt{\sum_{i=1}^n x_i^2},$
P范式：$\|\mathbf{x}\|_p = \left(\sum_{i=1}^n \left|x_i \right|^p \right)^{1/p}$

In [33]:
u = torch.tensor([3.0, -4.0])
torch.norm(u) # 2范式


tensor(5.)

In [34]:
torch.abs(u).sum()


tensor(7.)

In [35]:
## 矩阵范式
torch.norm(torch.ones((4, 9)))

tensor(6.)

### Exercise
1. Prove that the transpose of a matrix  A ’s transpose is  A :  (A⊤)⊤=A .

2. Given two matrices  A  and  B , show that the sum of transposes is equal to the transpose of a sum:  A⊤+B⊤=(A+B)⊤ .

3. Given any square matrix  A , is  A+A⊤  always symmetric? Why?

4. We defined the tensor X of shape (2, 3, 4) in this section. What is the output of len(X)?

5. For a tensor X of arbitrary shape, does len(X) always correspond to the length of a certain axis of X? What is that axis?

6. Run A / A.sum(axis=1) and see what happens. Can you analyze the reason?

7. When traveling between two points in Manhattan, what is the distance that you need to cover in terms of the coordinates, i.e., in terms of avenues and streets? Can you travel diagonally?

8. Consider a tensor with shape (2, 3, 4). What are the shapes of the summation outputs along axis 0, 1, and 2?

In [36]:
# 1.
A = torch.arange(12).reshape(4,3)
A, A.T, A.T.T,A.T.T == A

(tensor([[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11]]),
 tensor([[ 0,  3,  6,  9],
         [ 1,  4,  7, 10],
         [ 2,  5,  8, 11]]),
 tensor([[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11]]),
 tensor([[True, True, True],
         [True, True, True],
         [True, True, True],
         [True, True, True]]))

In [37]:
#2.
A = torch.arange(12).reshape(4,3)
B = torch.arange(12,24).reshape(4,3)

A , B , (A.T+B.T) == (A + B).T

(tensor([[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11]]),
 tensor([[12, 13, 14],
         [15, 16, 17],
         [18, 19, 20],
         [21, 22, 23]]),
 tensor([[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]))

In [39]:
#4. len(matrix) 返回第一个维度
X = torch.ones((2,3,4))
X,len(X)


(tensor([[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],
 
         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]]),
 2)

In [47]:
#6.
A = torch.arange(25, dtype = torch.float32).reshape(5, 5)

A, A.sum(axis=1),A / A.sum(axis=1), A / A.sum(axis=1,keepdims=True)

(tensor([[ 0.,  1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.,  9.],
         [10., 11., 12., 13., 14.],
         [15., 16., 17., 18., 19.],
         [20., 21., 22., 23., 24.]]),
 tensor([ 10.,  35.,  60.,  85., 110.]),
 tensor([[0.0000, 0.0286, 0.0333, 0.0353, 0.0364],
         [0.5000, 0.1714, 0.1167, 0.0941, 0.0818],
         [1.0000, 0.3143, 0.2000, 0.1529, 0.1273],
         [1.5000, 0.4571, 0.2833, 0.2118, 0.1727],
         [2.0000, 0.6000, 0.3667, 0.2706, 0.2182]]),
 tensor([[0.0000, 0.1000, 0.2000, 0.3000, 0.4000],
         [0.1429, 0.1714, 0.2000, 0.2286, 0.2571],
         [0.1667, 0.1833, 0.2000, 0.2167, 0.2333],
         [0.1765, 0.1882, 0.2000, 0.2118, 0.2235],
         [0.1818, 0.1909, 0.2000, 0.2091, 0.2182]]))

In [46]:
A, A.sum(axis=0), A / A.sum(axis=0), A / A.sum(axis=0,keepdims=True)

(tensor([[ 0.,  1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.,  9.],
         [10., 11., 12., 13., 14.],
         [15., 16., 17., 18., 19.],
         [20., 21., 22., 23., 24.]]),
 tensor([50., 55., 60., 65., 70.]),
 tensor([[0.0000, 0.0182, 0.0333, 0.0462, 0.0571],
         [0.1000, 0.1091, 0.1167, 0.1231, 0.1286],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.3000, 0.2909, 0.2833, 0.2769, 0.2714],
         [0.4000, 0.3818, 0.3667, 0.3538, 0.3429]]),
 tensor([[0.0000, 0.0182, 0.0333, 0.0462, 0.0571],
         [0.1000, 0.1091, 0.1167, 0.1231, 0.1286],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.3000, 0.2909, 0.2833, 0.2769, 0.2714],
         [0.4000, 0.3818, 0.3667, 0.3538, 0.3429]]))

In [48]:
#8. shape(2,3,4)
X = torch.arange(24).reshape((2,3,4))
X , X.sum(axis=0), X.sum(axis=1), X.sum(axis=2)

(tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]],
 
         [[12, 13, 14, 15],
          [16, 17, 18, 19],
          [20, 21, 22, 23]]]),
 tensor([[12, 14, 16, 18],
         [20, 22, 24, 26],
         [28, 30, 32, 34]]),
 tensor([[12, 15, 18, 21],
         [48, 51, 54, 57]]),
 tensor([[ 6, 22, 38],
         [54, 70, 86]]))