In [1]:
from torch import tensor, nn
import numpy as np
import torch

## 0. Tensor

torch.Tensor是一种包含单一数据类型元素的多维矩阵。Torch定义了七种CPU tensor类型和八种GPU tensor类型：

|Data type|	 CPU tensor | GPU tensor|
|:---|:---|:---|
|32-bit floating point|	torch.FloatTensor|	torch.cuda.FloatTensor|
|64-bit floating point|	torch.DoubleTensor|	torch.cuda.DoubleTensor|
|16-bit floating point|	N/A|	torch.cuda.HalfTensor|
|8-bit integer (unsigned)|	torch.ByteTensor|	torch.cuda.ByteTensor|
|8-bit integer (signed)|	torch.CharTensor|	torch.cuda.CharTensor|
|16-bit integer (signed)|	torch.ShortTensor|	torch.cuda.ShortTensor|
|32-bit integer (signed)|	torch.IntTensor|	torch.cuda.IntTensor|
|64-bit integer (signed)|	torch.LongTensor|	torch.cuda.LongTensor|

- `torch.tensor`和`torch.Tensor`的区别

In [2]:
torch.Tensor([5, 4]).type()

'torch.FloatTensor'

In [3]:
torch.Tensor([5, 4])

tensor([5., 4.])

In [4]:
torch.tensor([5, 4]).type()  # tensor接受已经存在的数据

'torch.LongTensor'

In [5]:
torch.tensor([5, 4])  # tensor接受已经存在的数据

tensor([5, 4])

In [6]:
torch.Tensor(5, 4)  #  Tensor创建一个多维矩阵

tensor([[6.5324e-16, 4.5586e-41, 6.5324e-16, 4.5586e-41],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])

In [7]:
print(torch.Tensor(5), torch.Tensor([5]), torch.tensor(5), sep='\n')

tensor([0., 0., 0., 0., 0.])
tensor([5.])
tensor(5)


- 不同类别的`Tensor`

In [8]:
torch.IntTensor([[1, 2, 3], [4, 5, 6]])  # 一个张量tensor可以从Python的list或序列构建：

tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)

In [9]:
torch.ShortTensor(2, 4).zero_()  # 一个空张量tensor可以通过规定其大小来构建：

tensor([[0, 0, 0, 0],
        [0, 0, 0, 0]], dtype=torch.int16)

In [10]:
torch.ByteTensor(range(10))

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.uint8)

In [11]:
torch.ShortTensor(np.arange(10))

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.int16)

In [12]:
u = np.random.randint(10, size=(2, 10))
u = torch.from_numpy(u)  # 默认转为LongTensor
u.type()

'torch.LongTensor'

In [13]:
u

tensor([[6, 2, 6, 6, 4, 6, 8, 1, 7, 4],
        [2, 3, 9, 7, 6, 3, 5, 8, 2, 7]])

In [14]:
u.requires_grad

False

In [15]:
u.type()

'torch.LongTensor'

In [16]:
u = u.type(torch.FloatTensor)

In [17]:
u.requires_grad = True  # 附加梯度，反向传播时计算

In [18]:
u

tensor([[6., 2., 6., 6., 4., 6., 8., 1., 7., 4.],
        [2., 3., 9., 7., 6., 3., 5., 8., 2., 7.]], requires_grad=True)

## 1.常用函数

In [19]:
x = torch.randint(1, 50, size=(10, 5))
x

tensor([[30, 43, 43, 22, 29],
        [14, 36,  5,  2, 41],
        [ 9, 16, 26, 17, 29],
        [31, 40,  8, 33,  1],
        [33, 23, 17, 10, 48],
        [ 1, 21, 26, 17,  8],
        [34,  8, 34, 24,  8],
        [48, 30, 37, 26, 12],
        [ 3,  4, 10,  1, 18],
        [41, 42, 31, 32, 23]])

In [20]:
x.size()

torch.Size([10, 5])

In [21]:
x.shape  # 各维度上的数量

torch.Size([10, 5])

In [22]:
x.reshape(5, 10)  # 重塑形状

tensor([[30, 43, 43, 22, 29, 14, 36,  5,  2, 41],
        [ 9, 16, 26, 17, 29, 31, 40,  8, 33,  1],
        [33, 23, 17, 10, 48,  1, 21, 26, 17,  8],
        [34,  8, 34, 24,  8, 48, 30, 37, 26, 12],
        [ 3,  4, 10,  1, 18, 41, 42, 31, 32, 23]])

In [23]:
x.reshape(5, -1)  # 如果某一维度为-1，则根据总元素个数自动计算该轴长度

tensor([[30, 43, 43, 22, 29, 14, 36,  5,  2, 41],
        [ 9, 16, 26, 17, 29, 31, 40,  8, 33,  1],
        [33, 23, 17, 10, 48,  1, 21, 26, 17,  8],
        [34,  8, 34, 24,  8, 48, 30, 37, 26, 12],
        [ 3,  4, 10,  1, 18, 41, 42, 31, 32, 23]])

In [24]:
x.numel()  # 元素个数

50

In [26]:
x.reshape(50)

tensor([30, 43, 43, 22, 29, 14, 36,  5,  2, 41,  9, 16, 26, 17, 29, 31, 40,  8,
        33,  1, 33, 23, 17, 10, 48,  1, 21, 26, 17,  8, 34,  8, 34, 24,  8, 48,
        30, 37, 26, 12,  3,  4, 10,  1, 18, 41, 42, 31, 32, 23])

In [27]:
torch.arange(1, 10, 2)  # torch.range()也可用，建议用前者

tensor([1, 3, 5, 7, 9])

In [28]:
torch.linspace(1, 10, 20)

tensor([ 1.0000,  1.4737,  1.9474,  2.4211,  2.8947,  3.3684,  3.8421,  4.3158,
         4.7895,  5.2632,  5.7368,  6.2105,  6.6842,  7.1579,  7.6316,  8.1053,
         8.5789,  9.0526,  9.5263, 10.0000])

In [29]:
torch.ones_like(x)

tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]])

In [31]:
torch.ones(size=x.shape)

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])

In [32]:
torch.zeros(10)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [33]:
torch.zeros_like(x)

tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])

In [35]:
torch.randn(10, 2) * 0.5 + 2

tensor([[2.0572, 2.5964],
        [1.9207, 2.1987],
        [2.6952, 1.6901],
        [3.6129, 1.8273],
        [1.8932, 2.2636],
        [1.3650, 1.8002],
        [1.8786, 2.5397],
        [2.0811, 2.2884],
        [1.6557, 2.7054],
        [1.6161, 1.9907]])

In [36]:
y = torch.randn(10, 1, 1, 1)  # 10*1*1*1
y

tensor([[[[ 1.1858]]],


        [[[ 0.9533]]],


        [[[-1.2399]]],


        [[[-0.5443]]],


        [[[ 0.7691]]],


        [[[-0.3276]]],


        [[[ 0.1924]]],


        [[[-1.8608]]],


        [[[ 0.7461]]],


        [[[-0.2815]]]])

In [38]:
y = torch.squeeze(y)  # 10
y

tensor([ 1.1858,  0.9533, -1.2399, -0.5443,  0.7691, -0.3276,  0.1924, -1.8608,
         0.7461, -0.2815])

In [39]:
y.reshape(-1)

tensor([ 1.1858,  0.9533, -1.2399, -0.5443,  0.7691, -0.3276,  0.1924, -1.8608,
         0.7461, -0.2815])

In [40]:
z = torch.arange(10).reshape(2, 5)
z

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])

In [41]:
z.t()  # 输入一个矩阵（2维张量），并转置0, 1维。 可以被视为函数transpose(input, 0, 1)的简写函数。

tensor([[0, 5],
        [1, 6],
        [2, 7],
        [3, 8],
        [4, 9]])

In [42]:
z.reshape(z.shape[1], -1)

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])

In [43]:
torch.sign(y)

tensor([ 1.,  1., -1., -1.,  1., -1.,  1., -1.,  1., -1.])

In [44]:
torch.sigmoid(y)  # 返回一个新张量，包含输入input张量每个元素的sigmoid值。

tensor([0.7660, 0.7218, 0.2244, 0.3672, 0.6833, 0.4188, 0.5480, 0.1346, 0.6783,
        0.4301])

In [45]:
torch.norm(y, p=2)  # 返回输入张量input 的p 范数

tensor(2.9972)

In [46]:
torch.median(y)  # 返回输入张量给定维度每行的中位数，同时返回一个包含中位数的索引的LongTensor。

tensor(-0.2815)

In [47]:
torch.prod(y)  # 返回输入张量input 所有元素的积。

tensor(-0.0145)

In [48]:
torch.sort(y, dim=0, descending=True)  # 对输入张量input沿着指定维按升序排序。如果不给定dim，则默认为输入的最后一维。如果指定参数descending为True，则按降序排序

torch.return_types.sort(
values=tensor([ 1.1858,  0.9533,  0.7691,  0.7461,  0.1924, -0.2815, -0.3276, -0.5443,
        -1.2399, -1.8608]),
indices=tensor([0, 1, 4, 8, 6, 9, 5, 3, 2, 7]))

In [49]:
torch.relu(torch.randn(10))

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0129, 0.0000, 0.0000, 0.0000, 0.0000,
        0.6724])

- 二元运算

In [50]:
a = torch.arange(10)
b = torch.arange(10, 20)

In [51]:
a * b  # 按元素乘

tensor([  0,  11,  24,  39,  56,  75,  96, 119, 144, 171])

In [52]:
torch.mul(a, b)  # 按元素乘

tensor([  0,  11,  24,  39,  56,  75,  96, 119, 144, 171])

In [53]:
torch.matmul(a, b)

tensor(735)

In [54]:
torch.mm(a.reshape(1, -1), b.reshape(-1, 1))

tensor([[735]])

In [55]:
torch.dot(a, b)  # 內积

tensor(735)

In [56]:
a @ b

tensor(735)

In [57]:
x = torch.rand(4, 3)
y = torch.rand(3, 4)

In [58]:
torch.mm(x, y)  # 矩阵乘法: 对矩阵mat1和mat2进行相乘。 如果mat1 是一个n×m张量，mat2 是一个 m×p张量，将会输出一个 n×p张量out。

tensor([[0.4106, 0.6260, 0.6398, 0.6742],
        [0.9696, 0.5561, 0.7245, 0.8263],
        [0.1044, 0.0935, 0.0824, 0.0935],
        [0.7852, 0.8291, 0.9147, 0.9885]])

In [59]:
torch.matmul(x, y)

tensor([[0.4106, 0.6260, 0.6398, 0.6742],
        [0.9696, 0.5561, 0.7245, 0.8263],
        [0.1044, 0.0935, 0.0824, 0.0935],
        [0.7852, 0.8291, 0.9147, 0.9885]])

In [60]:
x @ y  # 矩阵乘法运算符

tensor([[0.4106, 0.6260, 0.6398, 0.6742],
        [0.9696, 0.5561, 0.7245, 0.8263],
        [0.1044, 0.0935, 0.0824, 0.0935],
        [0.7852, 0.8291, 0.9147, 0.9885]])

In [61]:
z = torch.rand(3)

In [62]:
torch.mv(x, z)  # 矩阵向量乘： 对矩阵mat和向量vec进行相乘。 如果mat 是一个n×m张量，vec 是一个m元 1维张量，将会输出一个n元 1维张量。

tensor([0.5759, 0.7409, 0.1201, 0.8506])

In [63]:
x @ z

tensor([0.5759, 0.7409, 0.1201, 0.8506])

In [64]:
a = tensor(range(10), dtype=torch.float32).reshape(2, 5)
b = tensor(range(10, 20), dtype=torch.float32).reshape(2, 5)
c = tensor(range(20, 30), dtype=torch.float32).reshape(2, 5)

In [66]:
a, b, c

(tensor([[0., 1., 2., 3., 4.],
         [5., 6., 7., 8., 9.]]),
 tensor([[10., 11., 12., 13., 14.],
         [15., 16., 17., 18., 19.]]),
 tensor([[20., 21., 22., 23., 24.],
         [25., 26., 27., 28., 29.]]))

In [65]:
cat_ = torch.cat([a, b, c], 0)  # 在给定维度上对输入的张量序列进行连接操作，和extend类似
cat_

tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.],
        [20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29.]])

In [None]:
if torch.sum(a) > 10:
    print(1)

In [67]:
torch.cat([a, b, c], 1)

tensor([[ 0.,  1.,  2.,  3.,  4., 10., 11., 12., 13., 14., 20., 21., 22., 23.,
         24.],
        [ 5.,  6.,  7.,  8.,  9., 15., 16., 17., 18., 19., 25., 26., 27., 28.,
         29.]])

In [68]:
stack_ = torch.stack([a, b, c], 0)  # 沿着一个新维度对输入张量序列进行连接。 序列中所有的张量都应该为相同形状。

In [69]:
stack_

tensor([[[ 0.,  1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.,  9.]],

        [[10., 11., 12., 13., 14.],
         [15., 16., 17., 18., 19.]],

        [[20., 21., 22., 23., 24.],
         [25., 26., 27., 28., 29.]]])

In [70]:
stack_.split([2, 3], -1)

(tensor([[[ 0.,  1.],
          [ 5.,  6.]],
 
         [[10., 11.],
          [15., 16.]],
 
         [[20., 21.],
          [25., 26.]]]),
 tensor([[[ 2.,  3.,  4.],
          [ 7.,  8.,  9.]],
 
         [[12., 13., 14.],
          [17., 18., 19.]],
 
         [[22., 23., 24.],
          [27., 28., 29.]]]))

In [71]:
stack_.chunk(2, 1)

(tensor([[[ 0.,  1.,  2.,  3.,  4.]],
 
         [[10., 11., 12., 13., 14.]],
 
         [[20., 21., 22., 23., 24.]]]),
 tensor([[[ 5.,  6.,  7.,  8.,  9.]],
 
         [[15., 16., 17., 18., 19.]],
 
         [[25., 26., 27., 28., 29.]]]))

## 2. 自动求导`autograd`

torch.autograd提供了类和函数用来对任意标量函数进行求导。

实例:
$$
f(\mathbf{x})=2\mathbf{x}+1, g(y)=\mathbf{y^2}+5, z=mean(\mathbf{g(y)})
$$
求$\frac{dz}{dx}$

In [72]:
def f(x:tensor):
    return 2*x + 1

def g(x:tensor):
    return x**2 + 5

def mean(x:tensor):
    return torch.mean(x)

In [73]:
def dz_dx(x:tensor):  # 实际上的导数
    return (8*x + 4) / x.numel()

In [74]:
x = torch.randint(1, 10, size=(2, 5), dtype=torch.float32, requires_grad=True)

In [75]:
x.requires_grad
# x.requires_grad_(True)  # 如果为False, 可以追加

True

In [None]:
x.requires_grad_(True)

torch自动求导结果

In [76]:
z = mean(g(f(x)))
z.backward()  # 反向传播，自动求微分
x.grad  # dz/dx

tensor([[2.0000, 2.8000, 4.4000, 4.4000, 2.0000],
        [6.0000, 2.0000, 6.0000, 6.0000, 6.0000]])

解析求导结果

In [77]:
dz_dx(x)  # dz/dx

tensor([[2.0000, 2.8000, 4.4000, 4.4000, 2.0000],
        [6.0000, 2.0000, 6.0000, 6.0000, 6.0000]], grad_fn=<DivBackward0>)

In [None]:
z.grad_fn

In [None]:
a

In [None]:
torch.cat([a, a[:5]])

In [None]:
a

## 3. 案例: Bass模型拟合

In [None]:
def grad_desc(lossfunc, x0, x1, max_iters=200, learn_rate=0.05):
    """
    f: 待优化目标函数, grad_f: f的梯度, x0: 参数初值, x1:固定参数值, learn_rate: 学习率
    """
    trace_x = x0.clone().data.reshape(1, -1)
    x = x0
    i = 0
    for i in range(max_iters):
        l = lossfunc(x, x1)
        l.backward()
        x.data.sub_(learn_rate * x.grad.data)
        with torch.no_grad():
            trace_x = torch.cat([trace_x, x.detach().data.reshape(1, -1)], 0)
            if i % 10 == 0:
                loss = lossfunc(x, x1).data.numpy()
                print(f"迭代次数: {i}, 损失函数值: {loss:.4f}")
                
            if torch.sum(torch.abs(trace_x[-1] - trace_x[-2])) < 1e-3:  # 停止条件
                break

        x.grad.zero_()

    print(f"共迭代{i}次, 损失函数值: {lossfunc(x, x1).data.numpy():.4f}, 最优参数值: {x.tolist()}")
    return trace_x

In [None]:
def adaptive_momentum(lossfunc, x0, x1, beta1=0.5, beta2=0.5, learn_rate=0.05):
    trace_x = x0.clone().data.reshape(1, -1)
    x = x0
    m_0, v_0 = 0, 0
    i = 0
    while True:
        l = lossfunc(x0, x1)
        l.backward()
        m_1 = beta1 * m_0 + (1 - beta1) * x.grad.data
        v_1 = beta2 * v_0 + (1 - beta2) * x.grad.data ** 2
        x = x - learn_rate * m_1 / np.sqrt(v_1)
        trace_x = np.concatenate([trace_x, x.reshape(1, -1)])
        if i % 5 == 0:
            print(f"迭代次数: {i}, 目标函数值f: {f(x):.6f}")

        if np.sum(np.abs(trace_x[-1] - trace_x[-2])) < 1e-3:  # 停止条件
            break

        m_0, v_0 = m_1, v_1
        i += 1

    print(f"共迭代{len(trace_x)}次, 目标函数: {f(x)}, 最优参数值: {x.tolist()}")
    return trace_x

In [None]:
def bass(params, T:int): # 如果要使用其它模型，可以重新定义
    p, q, m = params
    t_tensor = torch.arange(1, T + 1, dtype=torch.float32)
    a = 1 - torch.exp(- (p + q) * t_tensor)
    b = 1 + q / p * torch.exp(- (p + q) * t_tensor)
    diffu_cont = m * a / b

    adopt_cont = torch.zeros_like(diffu_cont)
    adopt_cont[0] = diffu_cont[0]
    for t in range(1, T):
        adopt_cont[t] = diffu_cont[t] - diffu_cont[t - 1]
        
    return adopt_cont

In [None]:
def meanSquaredLoss(params, y):  # 平均平方误差
    T = y.numel()
    hat_y = bass(params, T)
    return torch.mean((hat_y - y)**2)

In [None]:
def r_2(params, y):  # R2
    T = y.numel()
    hat_y = bass(params, T)
    tse = torch.sum((y - hat_y)**2)
    ssl = torch.sum((y - torch.mean(y))**2)
    R_2 = (ssl - tse)/ssl
    return R_2

In [None]:
y = tensor([96, 195, 238, 380, 1045, 1230, 1267, 1828, 1586, 1673, 1800, 1580, 1500], dtype=torch.float32)

In [None]:
params = torch.FloatTensor([0.001, 0.2, 20000])
params.requires_grad_(True)
res = grad_desc(meanSquaredLoss, params, y, learn_rate=1e-9)

In [None]:
res[-1]

In [None]:
params = torch.FloatTensor([0.001, 0.2, 20000])
params.requires_grad_(True)
lr = 7e-10
for i in range(200):
    r = meanSquaredLoss(params, y)
    r.backward()
    params.data.sub_(lr * params.grad.data)
    with torch.no_grad():
        r2 = r_2(params, y)
        p, q, m = params.detach().numpy()
        if i % 10 == 0:
#             print(params.grad.data)
            print(f"第{i+1}轮, r2={r2.detach().numpy():.4f}\n    p:{p:.4f}, q:{q:.4f}, m:{m:.4f}")
    params.grad.data.zero_()  # 清空梯度，否则会累加

## 4. 构建网络的流程

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch
import numpy as np
from torch import nn

### 1. 定义计算架构

In [None]:
class Net(nn.Module):
    def __init__(self, dim_feature, dim_hidden, dim_output):
        super(Net, self).__init__()
        self.hidden = nn.Linear(dim_feature, dim_hidden)
        self.output = nn.Linear(dim_hidden, dim_output)
    
    def forward(self, X):  # 层之间的计算次序
        f1 = torch.relu(self.hidden(X))  # 0-1
        f2 = self.output(f1)  # 1->2
        f3 = nn.functional.softmax(f2, dim=1)  # 2->3
        return f3

In [None]:
net = Net(dim_feature=2, dim_hidden=10, dim_output=2)

In [None]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)  # 指定需优化的参数
loss_func = nn.CrossEntropyLoss()  # 确定训练准则

### 2. 生成测试数据

In [None]:
x0 = torch.randn(100, 2) + 2  # 均值为 2
y0 = torch.zeros(100)
x1 = torch.randn(100, 2) - 2  # 均值为 -2
y1 = torch.ones(100)

x = torch.cat((x0, x1)).type(torch.FloatTensor)
y = torch.cat((y0, y1)).type(torch.LongTensor)

In [None]:
idx = np.arange(len(x))
np.random.shuffle(idx)
train_x, train_y = x[idx[:50]], y[idx[:50]]  # 随机选取50个
test_x, test_y = x[idx[50:]], y[idx[50:]]

In [None]:
train_y

### 3. 训练

In [None]:
for i in range(200):
    out = net.forward(train_x)
    loss = loss_func(out, train_y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i % 20 == 0:
        with torch.no_grad():
            loss_train = loss_func(out, train_y)
            out_test = net.forward(test_x)
            loss_test = loss_func(out_test, test_y)
            print(f"loss_train: {loss_train}, loss_test: {loss_test}")

In [None]:
train_result = net(train_x)
predict_train_y = torch.max(train_result, 1)[1]

test_result = net(test_x)
predict_test_y = torch.max(test_result, 1)[1]

x_list = [train_x, test_x]
y_list = [predict_train_y, predict_test_y]

In [None]:
fig = plt.figure(figsize=(12, 5))
for i in range(2):
    px = x_list[i]
    py = y_list[i]
    ax = fig.add_subplot(1, 2, i+1)
    ax.set_xlabel('$x_0$')
    ax.set_ylabel('$x_1$')
    ax.scatter(px.data.numpy()[:,0], px.data.numpy()[:,1], c=py.data.numpy(), s=60, lw=0, cmap='RdYlGn')

## 5. 案例： 垃圾邮件分类

In [None]:
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import os

利用`nn.Module`实现`Logit`回归

In [None]:
class LogitNet(nn.Module):
    def __init__(self, dim_feature, dim_output):
        super(LogitNet, self).__init__()
        self.output = nn.Linear(dim_feature, dim_output)
    
    def forward(self, X):  # 层之间的计算次序
        h = self.output(X)  # 1 -> 2
        o = nn.functional.softmax(h, dim=1)  # 2 -> 3
        return o

In [None]:
df = pd.read_csv('../dataset/smsspamcollection/SMSSpamCollection', delimiter='\t', header=None, names=['category', 'message'])
df['label'] = (df.category == 'ham').astype('int')
print('垃圾邮件数量: %d ' % np.sum(df.label == 0))
print('正常邮件数量: %d ' % np.sum(df.label == 0))

In [None]:
X = df.message.values
y = df.label.values
X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=200)
# y转换为tensor
y_train = torch.tensor(y_train).type(torch.LongTensor)  # 注意label的形式为1维，即类别的标签，无需reshape(-1, 1)
y_test = torch.tensor(y_test).type(torch.LongTensor)
# 获取词的tf-idf
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train_raw)
X_test = vectorizer.transform(X_test_raw)
# X转换为tensor
X_train = torch.tensor(X_train.toarray(), dtype=torch.float)
X_test = torch.tensor(X_test.toarray(), dtype=torch.float)

In [None]:
batch_size = 300  # 构建每批次100个样本的训练集
dataset = TensorDataset(X_train, y_train)
data_iter = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

In [None]:
snet = LogitNet(dim_feature=X_train.shape[1], dim_output=2)
optimizer = torch.optim.SGD(snet.parameters(), lr=0.03)  # 指定需优化的参数
# loss_func = nn.CrossEntropyLoss()  # 确定训练准则
loss_func = nn.NLLLoss()

In [None]:
for i in range(100):
    for X, y in data_iter:        
        loss = loss_func(snet.forward(X), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if i % 20 == 0:
        with torch.no_grad():
            loss_train = loss_func(snet.forward(X_train), y_train)
            out_test = snet.forward(X_test)
            loss_test = loss_func(out_test, y_test)
            print(f"loss_train: {loss_train:.5f}, loss_test: {loss_test:.5f}")

In [None]:
train_result = snet(X_train)
predict_y_train = torch.max(train_result, 1)[1]
print(torch.sum(predict_y_train != y_train), torch.sum(predict_y_train == y_train))

In [None]:
a = torch.randn(4, 8)

In [None]:
torch.std(a, 1)