In [2]:
import torch

In [3]:
x=torch.tensor([1,2,3,4,5],dtype=torch.float32)
y=torch.tensor([2,4,6.2,7.6,9.8],dtype=torch.float32)

In [4]:
# build linear reg
def linear_reg(x,w,b):
    return x*w+b

def square_loss(y_hat,y):
    return torch.mean((y_hat-y)**2)

In [5]:
iter_time=50
lr=0.01
w=torch.tensor(0,dtype=torch.float32,requires_grad=True)
b=torch.tensor(0,dtype=torch.float32,requires_grad=True)

for i in range(iter_time):
    # compute predict value
    y_hat=linear_reg(x,w,b)
    
    # compute loss
    loss=square_loss(y_hat,y)
    
    # compute grad
    loss.backward()
    
    # update grad
    with torch.no_grad():
        #w.sub_(lr*w.grad)
        w-=w.grad*lr
        b-=b.grad*lr
        w.grad.zero_()
        b.grad.zero_()

In [6]:
# do not use w=w-lr*w.grad, because now w will be assign as a new tensor which requires_grad=False
# instead, use w-=lr*w.grad or w.sub_(lr*w.grad)

In [7]:
# general pipeline
# 1. design model (input,output)
# 2. construct the loss and optimizer 
# 3. training loop
#      - forward pass: compute prediction
#      - backward pass: gradients
#      - update weights

In [8]:
import torch
from torch.nn import MSELoss,Linear
from torch.optim import SGD

In [9]:
x=torch.tensor([[1],[2],[3],[4],[5]],dtype=torch.float32)
y=torch.tensor([[2],[4],[6.2],[7.6],[9.8]],dtype=torch.float32)

In [18]:
class LinearRegression(torch.nn.Module):
    def __init__(self,feature_n):
        super().__init__() # initalize superclass.
        self.linear_layer=Linear(feature_n,1)
    
    def forward(self,x):
        y = self.linear_layer(x)
        return y

In [17]:
sample_n, feature_n = x.shape
iter_time=30
learning_rate=0.05

# construct model
model = LinearRegression(feature_n)
# construct optimizer
loss_func = MSELoss() 
optimizer = SGD(model.parameters(),lr=learning_rate)
# training loop
for i in range(iter_time):
    y_pred = model(x)
    print(y_pred.shape,y.shape)
    loss = loss_func(y_pred,y)
    loss.backward()
    print(loss)
    optimizer.step()
    optimizer.zero_grad()

torch.Size([5, 1]) torch.Size([5, 1])
tensor(41.6811, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(1.4522, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.1027, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0566, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0542, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0533, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0524, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0517, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0509, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0501, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0494, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Size([5, 1])
tensor(0.0487, grad_fn=<MseLossBackward0>)
torch.Size([5, 1]) torch.Si

### loss function
1. All loss functions are stored in torch.nn module.
2. How to perform it:
    - First we need to create an instance (e.g. loss=torch.nn.MSELoss())
    - Next, call the loss object by passing into predict val and target val. (e.g. loss(y_hat,y))
    - Last, apply backward method to perform back prop (e.g. loss.backward())

### optimizer
1. to use torch.optim you have to construct an optimizer object. (e.g. torch.optim.SGD(parameters, defaults))
2. To construct an Optimizer you have to give it an iterable containing the parameters (all should be Variable s) to optimize.
3. All optimizers implement a step() method, that updates the parameters. (This should be done after loss.backward())
4. Use optimizer.zero_grad() to set the gradients to zero.

### Build a model
1. the model is inherited from class nn.Module
2. Implement the model by designing __init__ and forward methods.
3. call the model will invoke .forward() method.