# BS6207 Assignment 1

Xinxin <br>
Mar/2021

----

Given a fully connected Neural Network as follows: <br>
1. Input (x1,x2): 2 nodes
2. First hidden layer: 10 nodes, with weights (w) and bias (b), sigmoid activation function
3. Second hidden layer: 10 nodes, with weights (w) and bias (b), sigmoid activation function
4. Output (predict): 1 node

__Requirements:__ <br>
1. Implement this neural network in pytorch
2. Generate the input date (x1,x2) \in [0,1] drawn from a uniform random distribution
3. Generate the labels y = (x1*x1+x2*x2)/2
4. Implement a loss function L = (predict-y)^2
5. Use batch size of 1, that means feed data one point at a time into network and compute the loss. Do one time forward propagation with one data point.
6. Compute the gradients using pytorch autograd:
    - dL/dw, dL/db
    - Print these values into a text file: torch_autograd.dat
7. Implement the forward propagation and backpropagation algorithm from scratch, without using pytorch autograd, compute the gradients using your implementation
    - dL/dw, dL/db
    - Print these values into a text file: my_autograd.dat
8. Compare the two files torch_autograd.dat and my_autograd.dat and show that they give
the same values up to numerical precision errors

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random

In [4]:
# 1. implement the neural network in pytorch
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(2, 10)
    self.fc2 = nn.Linear(10, 5)
    self.fc3 = nn.Linear(5, 1)

  def forward(self, x):
    x = torch.sigmoid(self.fc1(x))
    x = torch.sigmoid(self.fc2(x))
    return self.fc3(x)

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=2, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=5, bias=True)
  (fc3): Linear(in_features=5, out_features=1, bias=True)
)


In [5]:
random.seed(127)
# 2. generate input data x1 x2 from uniform random distribution of [0,1]
input = torch.rand(1,2, requires_grad = False)

# 3. generate the labels of input data
y = (input[0][0]**2 + input[0][1]**2)/2

In [6]:
# 4. define loss function
def my_loss(output, target):
  loss = (output - target)**2
  return loss

In [7]:
# 5. One time forward propagation with one data point
y_pred = net(input)

In [8]:
# 6. compute dL/dw, dL/db using pytorch autograd
loss = my_loss(y_pred, y)
loss.backward(retain_graph = True)


In [9]:
loss

tensor([[0.9002]], grad_fn=<PowBackward0>)

In [10]:
# print dL/dw to check
print('dL/dw3 = ', net.fc3.weight.grad)
print('dL/dw2 = ', net.fc2.weight.grad)
print('dL/dw1 = ', net.fc1.weight.grad)

dL/dw3 =  tensor([[-1.1295, -0.5753, -1.1748, -1.0965, -1.0258]])
dL/dw2 =  tensor([[-0.0352, -0.0341, -0.0467, -0.0493, -0.0220, -0.0231, -0.0426, -0.0493,
         -0.0361, -0.0348],
        [ 0.0477,  0.0462,  0.0632,  0.0667,  0.0298,  0.0312,  0.0577,  0.0668,
          0.0489,  0.0472],
        [ 0.0536,  0.0519,  0.0710,  0.0749,  0.0335,  0.0351,  0.0648,  0.0750,
          0.0549,  0.0530],
        [ 0.0171,  0.0166,  0.0226,  0.0239,  0.0107,  0.0112,  0.0207,  0.0239,
          0.0175,  0.0169],
        [ 0.0212,  0.0206,  0.0281,  0.0297,  0.0133,  0.0139,  0.0257,  0.0297,
          0.0218,  0.0210]])
dL/dw1 =  tensor([[ 0.0026,  0.0046],
        [ 0.0015,  0.0026],
        [-0.0045, -0.0079],
        [-0.0021, -0.0038],
        [-0.0004, -0.0007],
        [-0.0042, -0.0074],
        [ 0.0023,  0.0041],
        [ 0.0033,  0.0059],
        [ 0.0017,  0.0031],
        [-0.0048, -0.0085]])


In [11]:
# print dL/db to check
print('dL/db3 = ', net.fc3.bias.grad)
print('dL/db2 = ', net.fc2.bias.grad)
print('dL/db1 = ', net.fc1.bias.grad)

dL/db3 =  tensor([-1.8976])
dL/db2 =  tensor([-0.0764,  0.1035,  0.1162,  0.0371,  0.0461])
dL/db1 =  tensor([ 0.0065,  0.0037, -0.0113, -0.0054, -0.0011, -0.0105,  0.0058,  0.0083,
         0.0044, -0.0121])


In [12]:
# write the gradiet to torch_autograd.dat
with open('/content/drive/MyDrive/Colab Notebooks/BS6207/Assignment1/torch_autograd.dat',"w") as file:
  file.write('\ndLdw3\n')
  file.write(np.array2string(net.fc3.weight.grad.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdw2\n')
  file.write(np.array2string(net.fc2.weight.grad.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdw1\n')
  file.write(np.array2string(net.fc1.weight.grad.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdb3\n')
  file.write(np.array2string(net.fc3.bias.grad.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdb2\n')
  file.write(np.array2string(net.fc2.bias.grad.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdb1\n')
  file.write(np.array2string(net.fc1.bias.grad.data.numpy(), precision = 7, separator = ','))


In [13]:
# 7. Implement forward propagation and backward propagation from scratch
# get the same weight and bias from the model above
w1 = net.fc1.weight
w2 = net.fc2.weight
w3 = net.fc3.weight

b1 = net.fc1.bias
b2 = net.fc2.bias
b3 = net.fc3.bias

In [14]:
# define the sigmoid activation function from scratch
def sigmoid(input):
  return 1/(1 + torch.exp(-input))

In [15]:
# define function for derivative of sigmoid
def sigmoid_der(input):
  s = sigmoid(input)
  return torch.transpose(s*(1-s), 0, 1)

In [16]:
# forward propagation

# first layer
z1 = torch.mm(input, torch.transpose(w1,0,1)) + b1
h1 = sigmoid(z1)
# second layer
z2 = torch.mm(h1, torch.transpose(w2,0,1)) + b2
h2 = sigmoid(z2)
# output
h3 = torch.mm(h2, torch.transpose(w3,0,1)) + b3

In [17]:
# backward propagation
loss_2 = my_loss(h3, y)
loss_2 # same as the loss from the model above

tensor([[0.9002]], grad_fn=<PowBackward0>)

In [18]:
# check if the output is the same as the model above
print(h3, y_pred)

tensor([[-0.6227]], grad_fn=<AddBackward0>) tensor([[-0.6227]], grad_fn=<AddmmBackward>)


In [19]:
# calculate dL/dw3
dL_dw3 = 2 * (h3 - y) * h2
dL_dw3

tensor([[-1.1295, -0.5753, -1.1748, -1.0965, -1.0258]], grad_fn=<MulBackward0>)

In [20]:
# calculate dL/dw2
dL_dw2 =  2 * (h3 - y) * torch.transpose(w3, 0, 1) @ h1 * sigmoid_der(z2)
dL_dw2

tensor([[-0.0352, -0.0341, -0.0467, -0.0493, -0.0220, -0.0231, -0.0426, -0.0493,
         -0.0361, -0.0348],
        [ 0.0477,  0.0462,  0.0632,  0.0667,  0.0298,  0.0312,  0.0577,  0.0668,
          0.0489,  0.0472],
        [ 0.0536,  0.0519,  0.0710,  0.0749,  0.0335,  0.0351,  0.0648,  0.0750,
          0.0549,  0.0530],
        [ 0.0171,  0.0166,  0.0226,  0.0239,  0.0107,  0.0112,  0.0207,  0.0239,
          0.0175,  0.0169],
        [ 0.0212,  0.0206,  0.0281,  0.0297,  0.0133,  0.0139,  0.0257,  0.0297,
          0.0218,  0.0210]], grad_fn=<MulBackward0>)

In [21]:
# calculate dL/dw1
dL_dw1 = torch.transpose(torch.transpose(2 * (h3 - y) * torch.transpose(w3, 0, 1) * sigmoid_der(z2) , 0,1 ) @ w2, 0,1) @ input * sigmoid_der(z1)
dL_dw1

tensor([[ 0.0026,  0.0046],
        [ 0.0015,  0.0026],
        [-0.0045, -0.0079],
        [-0.0021, -0.0038],
        [-0.0004, -0.0007],
        [-0.0042, -0.0074],
        [ 0.0023,  0.0041],
        [ 0.0033,  0.0059],
        [ 0.0017,  0.0031],
        [-0.0048, -0.0085]], grad_fn=<MulBackward0>)

In [22]:
# calculate dL/db3
dL_db3 = 2 * (h3 - y)
dL_db3

tensor([[-1.8976]], grad_fn=<MulBackward0>)

In [23]:
# calculate dL/db2
dL_db2 = torch.transpose(2 * (h3 - y) * torch.transpose(w3, 0, 1) * sigmoid_der(z2), 0, 1)
dL_db2

tensor([[-0.0764,  0.1035,  0.1162,  0.0371,  0.0461]],
       grad_fn=<TransposeBackward0>)

In [24]:
# calculate dL/db1
dL_db1 = torch.transpose(torch.transpose(torch.transpose(2 * (h3 - y) * torch.transpose(w3, 0, 1) * sigmoid_der(z2), 0, 1) @ w2, 0, 1) * sigmoid_der(z1), 0, 1)
dL_db1

tensor([[ 0.0065,  0.0037, -0.0113, -0.0054, -0.0011, -0.0105,  0.0058,  0.0083,
          0.0044, -0.0121]], grad_fn=<TransposeBackward0>)

In [25]:
# output the calculated gradient to my_autograd.dat
with open('/content/drive/MyDrive/Colab Notebooks/BS6207/Assignment1/my_autograd.dat',"w") as file:
  file.write('\ndLdw3\n')
  file.write(np.array2string(dL_dw3.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdw2\n')
  file.write(np.array2string(dL_dw2.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdw1\n')
  file.write(np.array2string(dL_dw1.data.numpy(), precision = 7, separator = ','))
  file.write('\ndLdb3\n')
  file.write(np.array2string(dL_db3.data.numpy()[0], precision = 7, separator = ','))
  file.write('\ndLdb2\n')
  file.write(np.array2string(dL_db2.data.numpy()[0], precision = 7, separator = ','))
  file.write('\ndLdb1\n')
  file.write(np.array2string(dL_db1.data.numpy()[0], precision = 7, separator = ','))