In [23]:
import numpy as np
import torch

In [2]:
x = torch.randn(3, 4, requires_grad=True)
x

tensor([[ 1.4086, -0.0648,  0.1465, -0.4711],
        [ 2.0809, -0.2548,  1.0870,  0.4020],
        [ 0.5828,  1.0515, -0.8192,  0.8985]], requires_grad=True)

In [3]:
b = torch.randn(3, 4)
b.requires_grad = True
b

tensor([[-0.0624,  1.1154,  0.9472,  0.5116],
        [ 0.4861,  0.7103,  1.1752,  0.6374],
        [-0.0752, -0.0204,  0.1741, -0.8160]], requires_grad=True)

In [5]:
t = x + b
t

tensor([[ 1.3462,  1.0506,  1.0937,  0.0406],
        [ 2.5670,  0.4555,  2.2622,  1.0394],
        [ 0.5076,  1.0311, -0.6451,  0.0825]], grad_fn=<AddBackward0>)

In [6]:
y = t.sum()
y

tensor(10.8313, grad_fn=<SumBackward0>)

In [7]:
y.backward()  # 反向传播求导数

In [9]:
x.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

$$y=w*x$$
$$z=y+b$$
$$z=w*x+b$$
反向传播求导数

In [16]:
x = torch.randn(1)
b = torch.randn(1, requires_grad=True)
w = torch.randn(1, requires_grad=True)
y = w * x
z = y + b
x, b, w, y, z

(tensor([-0.2768]),
 tensor([0.6393], requires_grad=True),
 tensor([-1.1215], requires_grad=True),
 tensor([0.3104], grad_fn=<MulBackward0>),
 tensor([0.9497], grad_fn=<AddBackward0>))

In [17]:
x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad, z.requires_grad

(False, True, True, True, True)

In [18]:
x.is_leaf, y.is_leaf, w.is_leaf, b.is_leaf, z.is_leaf  # w和x是叶节点

(True, False, True, True, False)

In [19]:
z.backward()

In [21]:
w.grad, x.grad

(tensor([-0.2768]), None)

In [15]:
b.grad

tensor([1.])

## **一个线性回归的例子**
构造一组输入数据x和对应的标签y

In [33]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape, x_train

((11, 1),
 array([[ 0.],
        [ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.]], dtype=float32))

In [34]:
import torch.nn as nn

In [36]:
y_values = [2 * i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train

array([[ 1.],
       [ 3.],
       [ 5.],
       [ 7.],
       [ 9.],
       [11.],
       [13.],
       [15.],
       [17.],
       [19.],
       [21.]], dtype=float32)

## 线性回归模型
相当于一个不加激活函数的全连接层

In [37]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

In [38]:
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

In [39]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

**指定好参数和损失函数**

In [40]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

**训练模型**

In [41]:
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)

    # 每一次迭代梯度要清零
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 反向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print(f"epoch:{epoch},loss:{loss.item()}")

epoch:50,loss:0.40743544697761536
epoch:100,loss:0.2323860079050064
epoch:150,loss:0.13254421949386597
epoch:200,loss:0.07559823244810104
epoch:250,loss:0.043118495494127274
epoch:300,loss:0.02459312602877617
epoch:350,loss:0.014027044177055359
epoch:400,loss:0.008000414818525314
epoch:450,loss:0.0045631942339241505
epoch:500,loss:0.00260267429985106
epoch:550,loss:0.0014844724209979177
epoch:600,loss:0.0008466961444355547
epoch:650,loss:0.0004829152603633702
epoch:700,loss:0.00027543632313609123
epoch:750,loss:0.00015709294530097395
epoch:800,loss:8.959745173342526e-05
epoch:850,loss:5.110794518259354e-05
epoch:900,loss:2.9146600354579277e-05
epoch:950,loss:1.662561589910183e-05
epoch:1000,loss:9.482711902819574e-06


**测试模型预测结果**

In [42]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.9942718],
       [ 2.9950967],
       [ 4.9959216],
       [ 6.9967465],
       [ 8.997572 ],
       [10.998397 ],
       [12.999222 ],
       [15.000047 ],
       [17.000872 ],
       [19.001696 ],
       [21.002522 ]], dtype=float32)

**模型的保存与读取**

In [43]:
torch.save(model.state_dict(), 'model.pkl')

In [44]:
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### **使用GPU进行训练**
将数据和模型传入到cuda即可

In [45]:
model_gpu = LinearRegressionModel(1, 1)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model_gpu.to(device)


for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)6

    # 每一次迭代梯度要清零
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 反向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print(f"epoch:{epoch},loss:{loss.item()}")

RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)