In [0]:
# This notebook implements a neural net to apprximate XOR function using PyTorch
from __future__ import print_function
import numpy as np
import torch
from torch.autograd import Variable

In [2]:
X = torch.tensor([[1.0,0.0,0.0,1.0],[0.0,0.0,1.0,1.0]],dtype=torch.float32) 
X = torch.transpose(X,0,1) # 4x2 matrix
Y = torch.tensor([[1.0,0.0,1.0,0.0]],dtype=torch.float32)                   
Y = torch.transpose(Y,0,1) # 4x1 vector
print("input: ", X)
print("output: ", Y)


input:  tensor([[1., 0.],
        [0., 0.],
        [0., 1.],
        [1., 1.]])
output:  tensor([[1.],
        [0.],
        [1.],
        [0.]])


In [3]:
# parameters of neural net
W1 = Variable(torch.torch.FloatTensor(2, 8).uniform_(-1, 1), requires_grad=True) # 2x8 matrix
b1 = Variable(torch.zeros((1,8)), requires_grad=True)                            # 1x8 matrix
W2 = Variable(torch.torch.FloatTensor(8, 1).uniform_(-1, 1), requires_grad=True) # 8x1 matrix
b2 = Variable(torch.zeros([1]), requires_grad=True)                              # scalar

learning_rate = 0.5

for step in range(10000):

  # forward pass
  Z1 = torch.mm(X,W1)    # 4x8 matrix
  Z2 = Z1 + b1           # 4x8 matrix
  Z3 = torch.sigmoid(Z2) # 4x8 matrix
  Z4 = torch.mm(Z3,W2)   # 4x1 vector
  Z5 = Z4 + b2           # 4x1 vector
  Yp = torch.sigmoid(Z5) # 4x1 vector

  # backward pass
  dYp = Yp-Y # 4x1 vector
  dZ5 = torch.sigmoid(Z5)*(1.0-torch.sigmoid(Z5))*dYp # 4x1 vector
  dZ4 = dZ5  # 4x1 vector
  dZ3 = torch.mm(dZ4,torch.transpose(W2,0,1)) # 4x8 matrix
  dZ2 = torch.sigmoid(Z2)*(1.0-torch.sigmoid(Z2))*dZ3 # 4x8 matrix
  dZ1 = dZ2 # 4x8 matrix
  
  dW1 = torch.mm(torch.transpose(X,0,1),dZ1)
  db1 = torch.sum(dZ2,0,True)
  dW2 = torch.mm(torch.transpose(Z3,0,1),dZ4)
  db2 = torch.sum(dZ5)
  
  # adjust parameters by gradient descent
  W1 = W1 - learning_rate*dW1
  b1 = b1 - learning_rate*db1
  W2 = W2 - learning_rate*dW2
  b2 = b2 - learning_rate*db2
  
  if step%1000 == 0:
    loss = torch.sum((Yp-Y)**2)
    print("loss:",loss)

print(Yp)
print(Y)

loss: tensor(1.0287, grad_fn=<SumBackward0>)
loss: tensor(0.0325, grad_fn=<SumBackward0>)
loss: tensor(0.0072, grad_fn=<SumBackward0>)
loss: tensor(0.0037, grad_fn=<SumBackward0>)
loss: tensor(0.0024, grad_fn=<SumBackward0>)
loss: tensor(0.0018, grad_fn=<SumBackward0>)
loss: tensor(0.0014, grad_fn=<SumBackward0>)
loss: tensor(0.0011, grad_fn=<SumBackward0>)
loss: tensor(0.0009, grad_fn=<SumBackward0>)
loss: tensor(0.0008, grad_fn=<SumBackward0>)
tensor([[0.9867],
        [0.0114],
        [0.9867],
        [0.0152]], grad_fn=<SigmoidBackward>)
tensor([[1.],
        [0.],
        [1.],
        [0.]])
