### Structure of a PyTorch model

1. Design Model
    - Input_size
    - Output_size
    - Forward_pass (contains all operations and layers)

2. Construct Loss and Optimizer

3. Training Loop

 a. Forward pass

 b. Backward pass

 c. Update Weights


In [None]:
import torch
import torch.nn as nn

In [None]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = 2*X

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [None]:
def forward(x):
    return w*x

In [None]:
lr = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(params=[w], lr=lr)

for epoch in range(1, n_iters+1):
    y_pred = forward(X)

    l = loss(y, y_pred)

    # Calculating gradient automatically
    # dl/dw
    l.backward()

    # Earlier
    # with torch.no_grad():
    #     w -= lr*w.grad
    # w.grad.zero_()
    optimizer.step()
    optimizer.zero_grad

    if epoch%10==0:
        print(f"Epoch {epoch}: w = {w:.3f}, loss={l:.5f}")

print(f"Prediction after training:\nf(5) = {forward(5):.3f}")

Epoch 10: w = 3.184, loss=22.34242
Epoch 20: w = 2.277, loss=1.96267
Epoch 30: w = 0.413, loss=7.22630
Epoch 40: w = 4.032, loss=28.22434
Epoch 50: w = 0.629, loss=25.42730
Epoch 60: w = 1.962, loss=4.10068
Epoch 70: w = 3.426, loss=4.39257
Epoch 80: w = -0.037, loss=25.75348
Epoch 90: w = 3.538, loss=27.97076
Epoch 100: w = 1.799, loss=6.87032
Prediction after training:
f(5) = 8.993


### Instead of manual **forward()**, we can use **nn.Linear()**

No. of Rows = No. of Samples

No. of Columns = No. of Features

Here we have 4 samples and each sample has 1 feature

In [None]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32).reshape(4, -1)
y = 2*X
print(X)
print(X.shape, y.shape)

tensor([[1.],
        [2.],
        [3.],
        [4.]])
torch.Size([4, 1]) torch.Size([4, 1])


In [None]:
n_samples, n_features = X.shape
print(n_samples, n_features)

# Here, we have 1 input and 1 output
model = nn.Linear(in_features=n_features, out_features=n_features)

4 1


Now input will be in the form of a tensor and not a float value

In [None]:
input_sample = torch.tensor([5], dtype=torch.float32)
print(f"Prediction before training:\nf(5) = {model(input_sample).item():.3f}")

Prediction before training:
f(5) = -2.845


**w** is a 2D matrix which typically represents the weight matrix for a fully connected layer (e.g., nn.Linear), where each entry w[i][j] corresponds to the weight connecting the j-th input feature to the i-th output feature.

In [None]:
lr = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)

for epoch in range(1, n_iters+1):
    y_pred = model(X)

    l = loss(y, y_pred)

    # Calculating gradient automatically
    # dl/dw
    l.backward()

    # Earlier
    # with torch.no_grad():
    #     w -= lr*w.grad
    # w.grad.zero_()
    optimizer.step()
    optimizer.zero_grad

    if epoch%10==0:
        [w, b] = model.parameters()
        print(f"Epoch {epoch}: w = {w[0][0].item():.3f}, loss={l:.5f}")

print(f"Prediction after training:\nf(5) = {model(input_sample).item():.3f}")

Epoch 10: w = 2.986, loss=29.55885
Epoch 20: w = 3.385, loss=1.66508
Epoch 30: w = -0.481, loss=47.42399
Epoch 40: w = 3.390, loss=41.60320
Epoch 50: w = 2.835, loss=0.13460
Epoch 60: w = -0.459, loss=36.51072
Epoch 70: w = 3.790, loss=51.17986
Epoch 80: w = 2.341, loss=3.99400
Epoch 90: w = -0.216, loss=24.08649
Epoch 100: w = 4.183, loss=56.46851
Prediction after training:
f(5) = 21.542


In the above case, we have only 1 weight

In [None]:
[w, b] = model.parameters()
print("For Weight")
print(w.shape)
print(w[0][0])
print("*"*100)
print("For Bias")
print(b.shape)
print(b[0])

For Weight
torch.Size([1, 1])
tensor(4.1829, grad_fn=<SelectBackward0>)
****************************************************************************************************
For Bias
torch.Size([1])
tensor(0.6273, grad_fn=<SelectBackward0>)


### Using custom model

In [None]:
class LinearRegression(nn.Module):
    def __init__(self, in_features, out_features):
        super(LinearRegression, self).__init__()

        # Define layers
        self.linear = nn.Linear(in_features=in_features, out_features=out_features)

    def forward(self, x):
        return self.linear(x)

In [None]:
# Here n_features=1
model = LinearRegression(in_features=n_features, out_features=n_features)

lr = 0.01
n_iters = 500

loss = nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)

for epoch in range(1, n_iters+1):
    y_pred = model(X)

    l = loss(y, y_pred)

    # Calculating gradient automatically
    # dl/dw
    l.backward()

    # Earlier
    # with torch.no_grad():
    #     w -= lr*w.grad
    # w.grad.zero_()
    optimizer.step()
    optimizer.zero_grad

    if epoch%100==0:
        [w, b] = model.parameters()
        print(f"Epoch {epoch}: w = {w[0][0].item():.3f}, loss={l:.5f}")

print(f"Prediction after training:\nf(5) = {model(input_sample).item():.3f}")

Epoch 100: w = 3.715, loss=38.29234
Epoch 200: w = 0.636, loss=32.15721
Epoch 300: w = 2.885, loss=21.10789
Epoch 400: w = 1.866, loss=9.39189
Epoch 500: w = 1.618, loss=1.51661
Prediction after training:
f(5) = 7.801
