In [1]:
# 测试torch安装是否成功
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.0.1+cu118
True


In [2]:
# Tensor, 张量，零维是标量，一维是矢量，二维是矩阵等
x = torch.arange(12)
print(x)
print(x.shape)  # 张量形状
print(x.numel())  # 张量元素总数
y = x.reshape(3, 4)  # 就地操作
print(y)
# 查看x中的元素变化，y中的会不会变
x[0] = 100
print(x)
print(y)  # y中的元素也变了

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
torch.Size([12])
12
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor([100,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11])
tensor([[100,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]])


In [3]:
# 全零张量
print(torch.zeros((2, 3, 4)))
# 全一张量
print(torch.ones(3, 2, 4))
# 高斯分布
print(torch.randn(3, 4))
# 使用列表初始化
print(torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
tensor([[ 0.6815,  0.0349, -1.1498,  1.1388],
        [ 0.9753,  1.2014, -0.9877,  0.5758],
        [-1.7291,  2.2944, -0.3217,  2.0213]])
tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])


In [4]:
# 运算符
x = torch.tensor([1.0, 2.0, 3.0])
print(x + 1)
y = torch.tensor([3.0, 4.0, 5.0])
print(x + y)
z = torch.tensor([1.0])
print(x + z)
x + y, x - y, x * y, x / y, x ** y  # **运算符是求幂运算
# 求幂等运算
print(torch.exp(x))

tensor([2., 3., 4.])
tensor([4., 6., 8.])
tensor([2., 3., 4.])
tensor([ 2.7183,  7.3891, 20.0855])


In [5]:
# 张量拼接
x = torch.arange(12, dtype=torch.float32).reshape((3, 4))
y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
print(torch.cat((x, y), dim=0))
print(torch.cat((x, y), dim=1))
print(x == y)
print(x.sum())

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])
tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])
tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])
tensor(66.)


In [6]:
# 广播机制
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
print(a)
print(b)
print(a + b)

tensor([[0],
        [1],
        [2]])
tensor([[0, 1]])
tensor([[0, 1],
        [1, 2],
        [2, 3]])


In [7]:
# 索引和切片，依旧是前开后闭区间
x = torch.arange(12, dtype=torch.float32).reshape((3, 4))
print(x[-1])
print(x[1:3])
print(x[1:])

tensor([ 8.,  9., 10., 11.])
tensor([[ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
tensor([[ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])


In [8]:
# 原地更新
print(id(x))  # 查看张量x的id
y = torch.zeros_like(x)
print('id(y):', id(y))
y[:] = x + x
print('id(y):', id(y))

2460900698768
id(y): 2460901226352
id(y): 2460901226352


In [9]:
# torch和numpy互相转换，共享内存
y = x.numpy()  # 共享内存
z = torch.tensor(y)  # 不共享内存
u = torch.from_numpy(y)  # 共享内存
print(type(y))
print(type(z))
y[0] = 1000
print(x, id(x))  # 即使id不同，依旧共享内存
print(y, id(y))
print(z, id(z))
print(u, id(u))
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

<class 'numpy.ndarray'>
<class 'torch.Tensor'>
tensor([[1000., 1000., 1000., 1000.],
        [   4.,    5.,    6.,    7.],
        [   8.,    9.,   10.,   11.]]) 2460900698768
[[1000. 1000. 1000. 1000.]
 [   4.    5.    6.    7.]
 [   8.    9.   10.   11.]] 2460915921520
tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]]) 2460903266080
tensor([[1000., 1000., 1000., 1000.],
        [   4.,    5.,    6.,    7.],
        [   8.,    9.,   10.,   11.]]) 2460879237824


(tensor([3.5000]), 3.5, 3.5, 3)

In [10]:
import os

os.makedirs(os.path.join("./", "data"), exist_ok=True)
data_file = os.path.join("./", "data", "house_tiny.csv")

In [11]:
# 写入文件
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # 列名
    f.write('NA,Pave,127500\n')  # 每行表示一个数据样本
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [12]:
import pandas as pd
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


In [13]:
# Nan表示缺失值，为了处理缺失值，典型的方法包括插值法和删除法
# 插值法用一个替代值来弥补缺失值，而删除法则直接忽略缺失值
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]  # 获取索引
inputs = inputs.fillna(inputs.mean(numeric_only = True))  # 插值法
print(inputs, outputs)
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN 0    127500
1    106000
2    178100
3    140000
Name: Price, dtype: int64
   NumRooms  Alley_Pave  Alley_nan
0       3.0        True      False
1       2.0       False       True
2       4.0       False       True
3       3.0       False       True


In [14]:
# 转为张量格式
x = torch.tensor(inputs.to_numpy(dtype=float))
y = torch.tensor(outputs.to_numpy(dtype=float))
print(x)
print(y)

tensor([[3., 1., 0.],
        [2., 0., 1.],
        [4., 0., 1.],
        [3., 0., 1.]], dtype=torch.float64)
tensor([127500., 106000., 178100., 140000.], dtype=torch.float64)


In [15]:
# 线性代数
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
B = A.clone()  # 通过分配新内存，将A的一个副本分配给B
print(A)
print(A + B)
print(A * B)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])
tensor([[ 0.,  2.,  4.,  6.],
        [ 8., 10., 12., 14.],
        [16., 18., 20., 22.],
        [24., 26., 28., 30.],
        [32., 34., 36., 38.]])
tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])


In [16]:
# 降维
print(A.shape)
print(A.sum())
# 指定轴求和
A_sum_axis0 = A.sum(axis=0)  # 轴0消失
print(A_sum_axis0)
print(A_sum_axis0.shape)
A_sum_axis1 = A.sum(axis=1)  # 轴1消失
print(A_sum_axis1)
print(A_sum_axis1.shape)

torch.Size([5, 4])
tensor(190.)
tensor([40., 45., 50., 55.])
torch.Size([4])
tensor([ 6., 22., 38., 54., 70.])
torch.Size([5])


In [17]:
# 求均值
print(A.mean())
print(A.sum() / A.numel())
print(A.mean(axis=0))
print(A.sum(axis=0) / A.shape[0])

tensor(9.5000)
tensor(9.5000)
tensor([ 8.,  9., 10., 11.])
tensor([ 8.,  9., 10., 11.])


In [18]:
# 非降维求和
sum_A = A.sum(axis=1, keepdims=True)
print(sum_A)
print(A / sum_A)
A.cumsum(axis=0)  # 累积求和

tensor([[ 6.],
        [22.],
        [38.],
        [54.],
        [70.]])
tensor([[0.0000, 0.1667, 0.3333, 0.5000],
        [0.1818, 0.2273, 0.2727, 0.3182],
        [0.2105, 0.2368, 0.2632, 0.2895],
        [0.2222, 0.2407, 0.2593, 0.2778],
        [0.2286, 0.2429, 0.2571, 0.2714]])


tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]])

In [19]:
# 点积，dot product
x = torch.ones(4, dtype = torch.float32)
y = torch.ones(4, dtype = torch.float32)
print(torch.dot(x, y))
print(torch.sum(x * y))

tensor(4.)
tensor(4.)


In [20]:
# 向量积
A.shape, x.shape, torch.mv(A, x)

(torch.Size([5, 4]), torch.Size([4]), tensor([ 6., 22., 38., 54., 70.]))

In [21]:
# 矩阵乘法
B = torch.ones(4, 3)
torch.mm(A, B)

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

In [22]:
# L2范数
u = torch.tensor([3.0, -4.0])
print(torch.norm(u))
# L1范数
print(torch.abs(u).sum())

tensor(5.)
tensor(7.)


In [23]:
# 梯度
# 连结一个多元函数对其所有变量的偏导数，以得到该函数的梯度（gradient）向量

In [24]:
# 链式法则
# 链式法则可以被用来微分复合函数

In [25]:
# 微分和积分是微积分的两个分支，前者可以应用于深度学习中的优化问题。

# 导数可以被解释为函数相对于其变量的瞬时变化率，它也是函数曲线的切线的斜率。

# 梯度是一个向量，其分量是多变量函数相对于其所有变量的偏导数。

# 链式法则可以用来微分复合函数。

In [26]:
# 标量的反向传播
x = torch.arange(4.0)
x.requires_grad_(True)
print(x.grad)
y = 2 * torch.dot(x, x)
print(y)
y.backward()
print(x.grad)
z = 3 * torch.dot(x, x)
print(z)
z.backward()  # 梯度发生了累加
x.grad

None
tensor(28., grad_fn=<MulBackward0>)
tensor([ 0.,  4.,  8., 12.])
tensor(42., grad_fn=<MulBackward0>)


tensor([ 0., 10., 20., 30.])

In [27]:
# 在默认情况下，PyTorch会累积梯度，我们需要清除之前的值
x.grad.zero_()
y = x.sum()  # y = x1 + x2 + x3 + x4
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [28]:
# 矢量的反向传播
# 当调用向量的反向计算时，通常会试图计算一批训练样本中每个组成部分的损失函数的导数
x.grad.zero_()
y = x * x
y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

In [29]:
# 分离计算
# 将某些计算移动到记录的计算图之外
x.grad.zero_()
y = x * x
u = y.detach()
z = u * x

z.sum().backward()
x.grad == u

tensor([True, True, True, True])

In [30]:
x.grad.zero_()
y.sum().backward()
x.grad == 2 * x

tensor([True, True, True, True])

In [31]:
# Python控制流的梯度计算
def f(a):
    b = a * 2
    while b.norm() < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

a = torch.randn(size=(), requires_grad=True)
d = f(a)
d.backward()
a.grad == d / a

tensor(True)

In [32]:
# 查阅文档
print(dir(torch.distributions))

['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'CumulativeDistributionTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PositiveDefiniteTransform', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'SoftplusTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution',

In [35]:
help(torch.ones)

Help on built-in function ones in module torch:

ones(...)
    ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
    
    Returns a tensor filled with the scalar value `1`, with the shape defined
    by the variable argument :attr:`size`.
    
    Args:
        size (int...): a sequence of integers defining the shape of the output tensor.
            Can be a variable number of arguments or a collection like a list or tuple.
    
    Keyword arguments:
        out (Tensor, optional): the output tensor.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).
        layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
            Default: ``torch.strided``.
        device (:class:`torch.device`, optional): the desired device of returned tensor.
            Default: if ``None

In [36]:
list?

In [38]:
list??