## x.1 generate Tensor

In [2]:
import torch

In [3]:
# build one dimension tensor, just like range(n)
x = torch.arange(12, dtype=torch.float32)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [4]:
x.numel()

12

In [5]:
x.shape

torch.Size([12])

In [6]:
X = x.reshape(3, 4)
X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [7]:
torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

下面的例子，randn，ones，zeros没必要记忆用了几个括号，只要知道一对括号不成就用两个

In [16]:
# build multi dimension Tensor
torch.randn(3, 4)

tensor([[ 0.4468,  0.6136, -1.0410,  1.3744],
        [ 0.3855,  0.4518, -1.7943, -0.5141],
        [-2.0124,  0.9585,  0.3497, -1.4943]])

In [21]:
torch.ones((2, 3, 4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [30]:
import numpy as np
a = np.arange(2)
print(a)

b = np.random.randn(2, 3)
print(b)

c = np.zeros((2, 3))
print(c)

[0 1]
[[0. 0. 0.]
 [0. 0. 0.]]
[[-0.18348654 -0.20707669 -1.32987231]
 [-0.08770208 -0.23207067 -0.14711481]]


## x.2 slice

In [31]:
X[-1], X[1:3]

(tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

In [32]:
X[1, 2] = 17
X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5., 17.,  7.],
        [ 8.,  9., 10., 11.]])

In [33]:
X[:2, :] = 12
X

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [ 8.,  9., 10., 11.]])

## x.3 some opration

In [2]:
x = torch.ones((3, 4), dtype=torch.int16) 
print(x)
torch.exp(x)    # e 的 n次方

tensor([[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]], dtype=torch.int16)


tensor([[2.7183, 2.7183, 2.7183, 2.7183],
        [2.7183, 2.7183, 2.7183, 2.7183],
        [2.7183, 2.7183, 2.7183, 2.7183]])

In [3]:
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

In [4]:
X = torch.arange(12, dtype=torch.float32).reshape((3,4))
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [5]:
X == Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

### x.3.1 哈马达积用*,点积用torch.dot(),矩阵-向量乘法用torch.mv(),矩阵-矩阵乘法用torch.mm()

## x.4 broadcasting

In [6]:
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [7]:
a+1

tensor([[1],
        [2],
        [3]])

In [8]:
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

## x.5 share memory

X[:] = X + Y or X += Y  will share memory

In [9]:
before = id(Y)
Y = Y + X
id(Y) == before

False

In [10]:
Z = torch.zeros_like(Y)
print('id(Z):', id(Z))
Z[:] = X + Y
print('id(Z):', id(Z))

id(Z): 139776704737472
id(Z): 139776704737472


In [11]:
before = id(X)
X += Y
id(X) == before

True

## x.6 convertion between array, ndarray and Tensor

This two API are in Pytorch

A = X.numpy() # Tensor -> ndarray

B = torch.from_numpy(A) # ndarray -> Tensor

In [12]:
A = X.numpy()
B = torch.from_numpy(A)
type(A), type(B)

(numpy.ndarray, torch.Tensor)

In [13]:
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

## x.7 automatic differentiation

x.requires_grad_(True)增加追踪梯度信息

y.backward()反向传播，注意只可以对标量进行反向传播。该步骤会更新需要追踪的参数的梯度矩阵

x.grad查看梯度矩阵

x.grad.zero_()  将梯度信息设置为0，不然梯度信息会累积。常见如在下一epoch中累积

In [3]:
x = torch.arange(4, dtype=torch.float32)
x

tensor([0., 1., 2., 3.])

In [5]:
# Can also create x = torch.arange(4.0, requires_grad=True)
x.requires_grad_(True)
print(x.grad)  # The gradient is None by default

None


In [7]:
y = 2 * torch.dot(x, x)
y, x.grad

(tensor(28., grad_fn=<MulBackward0>), None)

In [8]:
y.backward()    # 相当于对于每个位置都是对应位置的元素的平方，just like y = 2x^2, y' = 4x, 带入x得值
x.grad

tensor([ 0.,  4.,  8., 12.])

In [10]:
x.grad == 4 * x

tensor([True, True, True, True])

In [11]:
# Reset the gradient
x.grad.zero_()  
y = x.sum()
y.backward()
y, x.grad   # 相当于y = x1 + x2 + ... 对每个变量求偏导数

(tensor(6., grad_fn=<SumBackward0>), tensor([1., 1., 1., 1.]))

In [20]:
x.grad.zero_()
z = torch.tensor([4, 5, 6, 7], requires_grad=True, dtype=torch.float32) # the same as # z = torch.nn.Parameter(torch.tensor([4, 5, 6, 7], dtype=torch.float32))
print(x, z, x.grad, z.grad, sep='::\n')
a = torch.dot(x, z)
a.backward()
print(x.grad, z.grad)

tensor([0., 1., 2., 3.], requires_grad=True)::
tensor([4., 5., 6., 7.], requires_grad=True)::
tensor([0., 0., 0., 0.])::
None
tensor([4., 5., 6., 7.]) tensor([0., 1., 2., 3.])


### x.7.1 非标量变量的反向传播

当你对矩阵进行运算时，你最终得到的值必须得是一个标量y，再对这个标量y进行反向传播

In [23]:
x.grad.zero_()
y = x * x   # *是点乘，并不是矩阵乘法，矩阵乘法用torch.mm或者@
y.sum().backward()  # 因为最终得是一个标量才能进行反向传播，所以要用sum()对y进行降维，即y=x1*x1 + x2*x2, 否则就是y = [x1*x1, x2*x2]
# y.backward(gradient=torch.ones(len(y)))  # Faster: y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

### x.7.2 使用detach()剥离计算图

注意u = y.detach()是指返回的u是一个从计算图中的剥离的常数，而不是y从计算图中剥离出来了

In [24]:
x.grad.zero_()
y = x * x
u = y.detach()
z = u * x

z.sum().backward()
x.grad == u

tensor([True, True, True, True])

In [25]:
x.grad.zero_()
y.sum().backward()
x.grad == 2 * x

tensor([True, True, True, True])

## x.8 Python帮助小技巧，Python手册

dir(torch) 查看有哪些函数，类等attributes

help(torch.one) 查看帮助文档

In [28]:
import torch
print(dir(torch))
print(help(torch.ones))

['AVG', 'AggregationType', 'AliasDb', 'Any', 'AnyType', 'Argument', 'ArgumentSpec', 'AwaitType', 'BFloat16Storage', 'BFloat16Tensor', 'BenchmarkConfig', 'BenchmarkExecutionStats', 'Block', 'BoolStorage', 'BoolTensor', 'BoolType', 'BufferDict', 'ByteStorage', 'ByteTensor', 'CallStack', 'Callable', 'Capsule', 'CharStorage', 'CharTensor', 'ClassType', 'Code', 'CompilationUnit', 'CompleteArgumentSpec', 'ComplexDoubleStorage', 'ComplexFloatStorage', 'ComplexType', 'ConcreteModuleType', 'ConcreteModuleTypeBuilder', 'DeepCopyMemoTable', 'DeserializationStorageContext', 'DeviceObjType', 'Dict', 'DictType', 'DisableTorchFunction', 'DisableTorchFunctionSubclass', 'DispatchKey', 'DispatchKeySet', 'DoubleStorage', 'DoubleTensor', 'EnumType', 'ErrorReport', 'ExcludeDispatchKeyGuard', 'ExecutionPlan', 'FatalError', 'FileCheck', 'FloatStorage', 'FloatTensor', 'FloatType', 'FunctionSchema', 'Future', 'FutureType', 'Generator', 'Gradient', 'Graph', 'GraphExecutorState', 'HalfStorage', 'HalfTensor', 'IO