# Linear regression

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f2145683f10>

In [3]:
a = torch.randn((2,3))

In [4]:
a.size(0)

2

In [5]:
def synthetic_data(true_w, true_b, num_samples):
    X = torch.normal(0.0, 1.0, (num_samples, true_w.size(0)))
    y = torch.matmul(X, true_w) + true_b
    y += torch.normal(0.0, 0.01, y.size())
    y = torch.reshape(y, (-1, 1))
    return X,y

In [6]:
true_w = torch.tensor([2.0, -3.4], dtype=torch.float)
print(true_w)
print(true_w.size())
print(true_w.size(0))
true_b = 4.2
num_samples = 3000

tensor([ 2.0000, -3.4000])
torch.Size([2])
2


In [7]:
data_and_label = synthetic_data(true_w, true_b, num_samples)

In [8]:
features = data_and_label[0]
print(features)

labels = data_and_label[1]
print(labels)

tensor([[-0.5912,  0.2738],
        [-0.9649, -0.2358],
        [-0.6970, -1.1608],
        ...,
        [ 1.0776, -0.2889],
        [-0.1190, -0.3023],
        [ 0.0503, -0.3310]])
tensor([[2.0839],
        [3.0648],
        [6.7607],
        ...,
        [7.3397],
        [4.9855],
        [5.4237]])


# step2: create dataset and dataloader

In [9]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        super(CustomDataset, self).__init__()
        self.features = features
        self.labels = labels
        assert features.shape[0] == labels.shape[0]
    
    def __getitem__(self, i):
        return self.features[i], self.labels[i]
    
    def __len__(self):
        return self.features.shape[0]

In [10]:
dataset = CustomDataset(features, labels)

In [11]:
print(len(dataset))

3000


In [12]:
sample = dataset[0]

In [13]:
first_x = sample[0]
first_y = sample[1]
print(first_x)
print(first_y)

tensor([-0.5912,  0.2738])
tensor([2.0839])


In [14]:
dataloader = DataLoader(dataset=dataset, batch_size=10, shuffle=True, drop_last=False)

# step3: create model

In [21]:
model = torch.nn.Linear(true_w.size(0), 1)
model.weight.data.normal_(0, 0.01)
model.bias.data.fill_(0)

print(list(model.parameters()))

[Parameter containing:
tensor([[0.0081, 0.0056]], requires_grad=True), Parameter containing:
tensor([0.], requires_grad=True)]


# step4: create optimizer

In [26]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.03)

# step5: define loss function

In [30]:
loss_function = torch.nn.MSELoss(reduction='mean')

# step6: train to get the optimal parameter in model

In [35]:
num_epochs = 3;
for i in range(num_epochs):
    for batch in dataloader:
        x, y = batch
        output = model.forward(x);
        loss = loss_function(output, y);
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_output = model.forward(features)
    epoch_loss = loss_function(epoch_output, labels)
    print("Epoch: ", i ," loss: ", epoch_loss)

Epoch:  0  loss:  tensor(9.9488e-05, grad_fn=<MseLossBackward0>)
Epoch:  1  loss:  tensor(0.0001, grad_fn=<MseLossBackward0>)
Epoch:  2  loss:  tensor(9.9027e-05, grad_fn=<MseLossBackward0>)


# step7: evaluate the performance

In [38]:
w = model.weight.data
print("estimated parameter is ", w)
print(w.size())

estimated parameter is  tensor([[ 2.0001, -3.3996]])
torch.Size([1, 2])


In [42]:
w_dif = true_w - w.reshape(true_w.size())
print("error in estimating w: {} \n".format(w_dif))

error in estimating w: tensor([-0.0001, -0.0004]) 



In [45]:
b = model.bias.data
print("estimated bias is ", b)
print("error in estimating b: ", true_b - b)
print("done")

estimated bias is  tensor([4.1997])
error in estimating b:  tensor([0.0003])
done
