Implement linear regression with torch to fit simple lines 

In [24]:
import torch 
import numpy as np 
import matplotlib.pyplot as plt 
import torch.nn as nn 
import torch.nn.init as init 

In [13]:
input_data= torch.tensor([
    (1,1), 
    (2,1),
    (5,1), 
    (-1,1) 
], dtype=torch.float32)

truth = torch.tensor([5,8,17,-1], dtype=torch.float32)
truth = truth.view(-1,1)


## Define models 
The data is made of mx+b = y, for m=3,b =1.   
The model should figure out what m is. A very simple model 

(4,1) is the data size, so we can train a single layer perceptron model 

- For each training pair, the format is $$w_1 * x_1 + w_2 * x_2 = y$$ 

we want the ground truth solution of $w_1 = 3, w_2=1$

With sufficient training, the model will discover the correct vector. You can see that with a very low amount of data, the model takes 17695 iterations to have a loss lower than 3e-3


In [35]:
model = nn.Linear(2,1, bias=False)  # two features, 1 neuron in the next layer, which would be the output  
    # since we included the bias in the feature 

In [36]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
criterion = nn.MSELoss() 

In [39]:
for epoch in range (10000000): 
    prediction = model(input_data)  # matrix multiplication, [4,2] * [2,1] = [4,1]
    loss= criterion(prediction, truth)  # element-wise that's more efficient 
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step() 
    if (loss.item()< 3e-3):
        print(epoch)
        break 
    
print(f"Final loss: {loss.item()}")

17695
Final loss: 0.002999730873852968


In [38]:
print(model.weight.data)

tensor([[3.1303, 1.4657]])


### What happens if we have very bad weight initialization? 
It doesn't what our weight initialization is constant or 0, since the gradient is not dependent on the weight yet 

Bias= False  
- all neurons move similar weights in terms of gradient, so basically every change is diluted out, as if they are dead neurons 
- you see this with multiple neurons 

Bias = True 
- all neurons will move the same but bias gradient doesn't affect the weight gradient. Behavior is very similar with when bias=false. 

In [75]:
model = nn.Linear(2,3, bias=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
criterion = nn.MSELoss() 
with torch.no_grad(): 
    init.zeros_(model.weight)   # initialize to 0 

In [73]:
for epoch in range (2): 
    prediction = model(input_data)  
    loss= criterion(prediction, truth)  
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step() 

    print(f"Epoch: {epoch} | prediction: {prediction} | loss: {loss.item()}")
    print(f"gradient: {model.weight.grad}")
    print("Model weights: ", model.weight.data)

Epoch: 0 | prediction: tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], grad_fn=<MmBackward0>) | loss: 94.75
gradient: tensor([[-17.8333,  -4.8333],
        [-17.8333,  -4.8333],
        [-17.8333,  -4.8333]])
Model weights:  tensor([[0.0018, 0.0005],
        [0.0018, 0.0005],
        [0.0018, 0.0005]])
Epoch: 1 | prediction: tensor([[ 0.0023,  0.0023,  0.0023],
        [ 0.0041,  0.0041,  0.0041],
        [ 0.0094,  0.0094,  0.0094],
        [-0.0013, -0.0013, -0.0013]], grad_fn=<MmBackward0>) | loss: 94.64761352539062
gradient: tensor([[-17.8236,  -4.8309],
        [-17.8236,  -4.8309],
        [-17.8236,  -4.8309]])
Model weights:  tensor([[0.0036, 0.0010],
        [0.0036, 0.0010],
        [0.0036, 0.0010]])


  return F.mse_loss(input, target, reduction=self.reduction)


In [74]:
print(model.weight.data)

tensor([[0.0036, 0.0010],
        [0.0036, 0.0010],
        [0.0036, 0.0010]])


### Solving Linear models 
- linear models with mse loss function can have a close form solution 

In [1]:
import numpy as np 
X= np.array([
    [2070, 138],
    [138,10]
])
Y= np.array([3717,241])

theta =np.linalg.solve(X,Y)

In [3]:
print(theta)
theta.shape

[ 2.36231884 -8.5       ]


(2,)

### What if the matrix is unsolvable? 
- contradictory or no solution

In [4]:
X= np.array([
    [13,13],
    [13,13]
])
Y = np.array([36,54])
theta = np.linalg.solve(X,Y)

LinAlgError: Singular matrix

#### What about infinite number of solutions 
- when all points in the system are linearly dependent of each other 

In [1]:
import numpy as np
X=np.array([
    [5,5], [5,5]
])
Y= np.array([15,15])
theta=np.linalg.solve(X,Y)


LinAlgError: Singular matrix

In [3]:
import numpy as np 
X= np.array([
    [5,7], [7,10]
])
Y= np.array([4,5])
theta = np.linalg.solve(X,Y)
theta

array([ 5., -3.])