In [25]:
# `nn` is an abbreviation for neural networks
from torch import nn
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [20]:
'''
In PyTorch, the fully-connected layer is defined in the Linear class. Note that we passed two arguments into nn.Linear.
The first one specifies the input feature dimension, whichh is 2, and the
second one is the output feature dimension, which is a single scalar and therefore 1.
'''


net = nn.Sequential(nn.Linear(2, 1))

In [21]:
'''
 Deep learning frameworks often have a predefined way to initialize the
parameters. Here we specify that each weight parameter should be randomly sampled from a normal distribution with mean 0 and standard deviation 0.01. The bias parameter will be initialized
to zero
'''

net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [22]:
'''
The MSELoss class computes the mean squared error, also known as squared L2 norm. By default
it returns the average loss over examples. 
'''
loss = nn.MSELoss()

In [23]:
'''When we instantiate an SGD instance, we will specify the parameters to optimize over (obtainable from
our net via net.parameters()), with a dictionary of hyperparameters required by our optimization
algorithm. Minibatch stochastic gradient descent just requires that we set the value lr, which is
set to 0.03 here.
'''
trainer = torch.optim.SGD(net.parameters(), lr=0.03)

In [24]:
from d2l import torch as d2l
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

In [46]:
features.shape

torch.Size([1000, 2])

In [47]:
def load_array(data_arrays, batch_size, is_train=True): #@save
    """Construct a PyTorch data iterator."""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [62]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [63]:
i=0
for X, y in data_iter:
    i+=1
    print(i)

1


In [65]:
epochs=3
for i in range(epochs):
    for X,y in data_iter:
        l=loss(net(X),y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l=loss(net(features),labels)
    print('epoch-',i,' loss ',l)
    

epoch- 0  loss  tensor(26.6352, grad_fn=<MseLossBackward>)
epoch- 1  loss  tensor(23.4412, grad_fn=<MseLossBackward>)
epoch- 2  loss  tensor(20.6301, grad_fn=<MseLossBackward>)


In [66]:
w = net[0].weight.data
print('error in estimating w:', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('error in estimating b:', true_b - b)


error in estimating w: tensor([ 1.5600, -2.6443])
error in estimating b: tensor([3.2524])


tensor([[ 0.4400, -0.7557]])