In [1]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot


x = np.random.rand(100, 1)
y = np.random.rand(100, 1)

In [2]:
# 对输入数据的处理
# 1 将数据从 numpy 类型转换为 tensor 类型, float()是强制转化为float类型
# 2 如何将 cpu 上面的数据放到 GPU 上去？
# 3 一般情况下1， 2都是一起完成的
# 4 如何生成随机数并将其转化为GPU上的tensor的格式？

# 1
x_cpu_tensor = torch.from_numpy(x).float()
y_cpu_tensor = torch.from_numpy(y).float()
# print('x_cpu_tensor' + x_cpu_tensor.type())

# 2
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
# 这里由于我装好了GPU版本的torch就是gpu，所以 device = 'cuda'
x_gpu_tensor = x_cpu_tensor.to(device)
y_gpu_tensor = y_cpu_tensor.to(device)
# print('x_gpu_tensor' + x_gpu_tensor.type())

# 3
# x_gpu_tensor = torch.from_numpy(x).float().ro(device)

# 4
a_cpu_tensor = torch.randn(1, requires_grad = True, dtype = torch.float)

a_gpu_tensor_wrong = torch.randn(1, requires_grad = True, dtype = torch.float).to(device) #这会导致 grad 被“shadow”

a_gpu_tensor_right_bad = torch.randn(1, dtype=torch.float).to(device)
a_gpu_tensor_right_bad.requires_grad_()

a_gpu_tensor_right_good = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)



In [3]:
# 反向传播——以y = a + b * x 为例
# 1 复杂的做法——自己计算
# 2 简单的做法——使用optimizer（优化器），以sgd为例
# 3 简单的做法——使用nn中封装的损失函数，以MSEloss为例
# 4 常见的损失函数
# 5 用类进行封装——以线性分类器为例
# 6 用函数进行进一步封装——以线性分类器为例
# 7 将输入数据用函数封装
# 8 划分数据集


# 反向传播之前要做的准备
lr = 1e-1
n_epochs = 1000
a = torch.randn(1, requires_grad = True, dtype = torch.float, device = 'cuda')
b = torch.randn(1, requires_grad = True, dtype = torch.float, device = 'cuda')

x_train_tensor = x_gpu_tensor
y_train_tensor = y_gpu_tensor
# 1
for epoch in range(n_epochs):
    yhat = a + b * x_train_tensor
    error = y_train_tensor - yhat
    loss = (error ** 2).mean()

    # a_grad = -2 * error.mean()
    # b_grad = -2 * (x_tensor * error).mean()
    # We just tell PyTorch to work its way BACKWARDS from the specified loss!
    loss.backward() #和上面两句代码等价
    
    # print(a.grad)
    
    # What about UPDATING the parameters? Not so fast...
    
    # FIRST ATTEMPT——wrong
    # AttributeError: 'NoneType' object has no attribute 'zero_'
    # a = a - lr * a.grad
    # print(a)

    # SECOND ATTEMPT——wrong
    # RuntimeError: a leaf Variable that requires grad has been used in an in-place operation.
    # a -= lr * a.grad
    
    # THIRD ATTEMPT——right
    # We need to use NO_GRAD to keep the update out of the gradient computation
    # Why is that? It boils down to the DYNAMIC GRAPH that PyTorch uses...
    with torch.no_grad():
        a -= lr * a.grad
    
    # PyTorch is "clingy" to its computed gradients, we need to tell it to let it go...
    a.grad.zero_() # 在这个迭代方法中这句代码是必不可少的，否则就会出现错误

    
# 2  
optimizer = optim.SGD([a, b], lr=lr)  # SGD 随机梯度下降，
# 但实际上我们经常使用像AdaGrad，RMSProp，Adam等等更为优秀的优化器来训练神经网络。
for epoch in range(n_epochs):
    yhat = a + b * x_train_tensor
    error = y_train_tensor - yhat
    loss = (error ** 2).mean()

    loss.backward()
    
    #with torch.no_grad():
    #    a -= lr * a.grad   
    # 下面这句代码与上两句代码等价 
    optimizer.step() 
    
    # a.grad.zero_() 
    # 下面这句代码与上两句代码等价
    optimizer.zero_grad()
    
# 3
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD([a, b], lr=lr)
for epoch in range(n_epochs):
    yhat = a + b * x_train_tensor
    
    # error = y_tensor - yhat
    # loss = (error ** 2).mean()
    # 下一行代码和上两行代码等价
    loss = loss_fn(y_train_tensor, yhat)

    loss.backward()    
    optimizer.step()
    optimizer.zero_grad()

# 4
# 基本用法
# criterion = LossCriterion() #构造函数有自己的参数
# loss = criterion(x, y) #调用标准时也有参数

# 4 - 1 L1范数损失 L1Loss
torch.nn.L1Loss(reduction='mean')

# 4 - 2 均方误差损失 MSELoss
torch.nn.MSELoss(reduction='mean')

# 详见 https://blog.csdn.net/shanglianlm/article/details/85019768

MSELoss()

In [4]:
# 5
class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        # To make "a" and "b" real parameters of the model, we need to wrap them with nn.Parameter
        self.a = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float))
        self.b = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float))
        
    def forward(self, x):
        # Computes the outputs / predictions
        return self.a + self.b * x

model = ManualLinearRegression().to(device)
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=lr)

for epoch in range(n_epochs):
    # What is this?!?
    model.train()

    # No more manual prediction!
    # yhat = a + b * x_tensor
    yhat = model(x_train_tensor)
    
    loss = loss_fn(y_train_tensor, yhat)
    loss.backward()    
    optimizer.step()
    optimizer.zero_grad()

In [5]:
# 6
class LayerLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        # Instead of our custom parameters, we use a Linear layer with single input and single output
        self.linear = nn.Linear(1, 1)
                
    def forward(self, x):
        # Now it only takes a call to the layer to make predictions
        return self.linear(x)

# Alternatively, you can use a Sequential model
model = nn.Sequential(nn.Linear(1, 1)).to(device)

def make_train_step(model, loss_fn, optimizer):
    # Builds function that performs a step in the train loop
    def train_step(x, y):
        # Sets model to TRAIN mode
        model.train()
        # Makes predictions
        yhat = model(x)
        # Computes loss
        loss = loss_fn(y, yhat)
        # Computes gradients
        loss.backward()
        # Updates parameters and zeroes gradients
        optimizer.step()
        optimizer.zero_grad()
        # Returns the loss
        return loss.item()
    
    # Returns the function that will be called inside the train loop
    return train_step

# Creates the train_step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)
losses = []

# For each epoch...
for epoch in range(n_epochs):
    # Performs one train step and returns the corresponding loss
    loss = train_step(x_train_tensor, y_train_tensor)
    losses.append(loss)

In [7]:
# 7
from torch.utils.data import Dataset, TensorDataset
class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor
        
    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.x)

x_train = x
y_train = y
# Wait, is this a CPU tensor now? Why? Where is .to(device)?
x_train_tensor = torch.from_numpy(x_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

train_data = CustomDataset(x_train_tensor, y_train_tensor)
train_data = TensorDataset(x_train_tensor, y_train_tensor)  # 二者等价，直接用TensorDateset即可

from torch.utils.data import DataLoader
train_loader = DataLoader(dataset = train_data, batch_size = 16, shuffle = True)
# next(iter(train_loader)) # 每16个数据为一组，这行代码是为了查看下一组

losses = []
train_step = make_train_step(model, loss_fn, optimizer)

for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        # the dataset "lives" in the CPU, so do our mini-batches
        # therefore, we need to send those mini-batches to the
        # device where the model "lives"
        # 大概意思就是说上面的model是位于device上的，所以要训练的数据也要to（device）
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        
        loss = train_step(x_batch, y_batch)
        losses.append(loss)

In [8]:
# 8
from torch.utils.data.dataset import random_split
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader

device = 'cuda' if torch.cuda.is_available() else 'cpu'

x_tensor = torch.from_numpy(x).float()
y_tensor = torch.from_numpy(y).float()

a = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)
b = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)

dataset = TensorDataset(x_tensor, y_tensor)

train_dataset, val_dataset = random_split(dataset, [80, 20]) # 将数据集8、2分

train_loader = DataLoader(dataset = train_dataset, batch_size = 16)
val_loader = DataLoader(dataset = val_dataset, batch_size = 20)

losses = []
val_losses = []

lr = 0.01
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD([a, b], lr=lr)
train_step = make_train_step(model, loss_fn, optimizer)

for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        loss = train_step(x_batch, y_batch)
        losses.append(loss)
        
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            
            model.eval()

            yhat = model(x_val)
            val_loss = loss_fn(y_val, yhat)
            val_losses.append(val_loss.item())

In [9]:
# 检查模型的参数
model.state_dict()

OrderedDict([('0.weight', tensor([[-0.6978]], device='cuda:0')),
             ('0.bias', tensor([0.6750], device='cuda:0'))])

In [11]:
import torch
 
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
 
# Create random Tensors to hold inputs and outputs.
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
 
# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
          torch.nn.Linear(D_in, H),
          torch.nn.ReLU(),
          torch.nn.Linear(H, D_out),
        )
loss_fn = torch.nn.MSELoss(reduction = 'sum')
 
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use Adam; the optim package contains many other
# optimization algoriths. The first argument to the Adam constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
  # Forward pass: compute predicted y by passing x to the model.
  y_pred = model(x)
 
  # Compute and print loss.
  loss = loss_fn(y_pred, y)
  print(t, loss.item())
  
  # Before the backward pass, use the optimizer object to zero all of the
  # gradients for the Tensors it will update (which are the learnable weights
  # of the model)
  optimizer.zero_grad()
 
  # Backward pass: compute gradient of the loss with respect to model parameters
  loss.backward()
 
  # Calling the step function on an Optimizer makes an update to its parameters
  optimizer.step()


0 708.8596801757812
1 690.9149780273438
2 673.413330078125
3 656.3956909179688
4 639.882080078125
5 623.9313354492188
6 608.3696899414062
7 593.2067260742188
8 578.5216674804688
9 564.3375244140625
10 550.6903076171875
11 537.5280151367188
12 524.7810668945312
13 512.5166625976562
14 500.6340637207031
15 489.08966064453125
16 477.79022216796875
17 466.77532958984375
18 456.053466796875
19 445.64227294921875
20 435.55413818359375
21 425.7245788574219
22 416.1257629394531
23 406.7690124511719
24 397.6847229003906
25 388.84490966796875
26 380.1683349609375
27 371.7011413574219
28 363.4910888671875
29 355.51470947265625
30 347.69268798828125
31 340.01910400390625
32 332.483642578125
33 325.1187744140625
34 317.9007873535156
35 310.8634033203125
36 303.9915771484375
37 297.2645263671875
38 290.67572021484375
39 284.2334899902344
40 277.91925048828125
41 271.7469787597656
42 265.7047424316406
43 259.7878723144531
44 253.98158264160156
45 248.2821044921875
46 242.69065856933594
47 237.2090454

406 4.9047142965719104e-05
407 4.665308370022103e-05
408 4.437381721800193e-05
409 4.220255505060777e-05
410 4.013648140244186e-05
411 3.816717071458697e-05
412 3.6289980926085263e-05
413 3.45068474416621e-05
414 3.2802516216179356e-05
415 3.1182626116788015e-05
416 2.9641299988725223e-05
417 2.8172891688882373e-05
418 2.6775383958010934e-05
419 2.5444589482503943e-05
420 2.4179003958124667e-05
421 2.2971435100771487e-05
422 2.1827692762599327e-05
423 2.073553332593292e-05
424 1.96964101633057e-05
425 1.870907362899743e-05
426 1.776927092578262e-05
427 1.687433905317448e-05
428 1.6023977877921425e-05
429 1.5215549865388311e-05
430 1.4446908608078957e-05
431 1.3715824934479315e-05
432 1.3018914614804089e-05
433 1.2358239473542199e-05
434 1.1729356629075482e-05
435 1.1131680366815999e-05
436 1.0562827810645103e-05
437 1.0023159120464697e-05
438 9.509434676147066e-06
439 9.022408448799979e-06
440 8.55841426528059e-06
441 8.118114237731788e-06
442 7.699327397858724e-06
443 7.30192368791904