# Building an MLP from scratch with Pytorch

In [0]:
import torch
from torch.autograd import Variable
import numpy as np

In [10]:
# loading the data
from sklearn.datasets import load_iris

data, label = load_iris(return_X_y = True)
ind = np.where(label < 2)
X = torch.tensor(data[ind]).float()
Y = torch.tensor(np.eye(2)[label[ind]]).float() # one-hotting the label
print(X.shape, Y.shape)

torch.Size([100, 4]) torch.Size([100, 2])


In [0]:
class MLP:

  def __init__(self, input_size=4, hidden_size=3, num_classes=2, lr=1e-3):
    self.W1 = Variable(torch.randn(input_size, hidden_size), requires_grad=True)
    self.b1 = Variable(torch.randn(hidden_size), requires_grad=True)
    #self.W1.data.fill_(1.)
    #self.b1.data.fill_(1.)

    self.W2 = Variable(torch.randn(hidden_size, num_classes), requires_grad=True)
    self.b2 = Variable(torch.randn(num_classes), requires_grad=True)
    #self.W2.data.fill_(1.)
    #self.b2.data.fill_(1.)

    self.lr = lr

  def relu(self, x):
    # torch max is elementwise, we can't pass a scalar 0 to it
    return torch.max(x, torch.zeros_like(x))

  def softmax(self, x):
    e = torch.exp(x - torch.max(x)) # the - is for more stability
    return e / e.sum()

  def cross_entropy(self, y, o):
    t = y * torch.log(o + 1e-10)
    return -torch.sum(t)

  def forward(self, x):
    h = self.relu(torch.matmul(x, self.W1) + self.b1)
    o = self.softmax(torch.matmul(h, self.W2) + self.b2)
    return o

  def backward(self, loss):
    loss.backward()

    self.W1.data -= self.lr * self.W1.grad.data
    self.b1.data -= self.lr * self.b1.grad.data

    self.W2.data -= self.lr * self.W2.grad.data
    self.b2.data -= self.lr * self.b2.grad.data

  def train(self, x, y, epochs):
    for epoch in range(epochs):
      losses = []
      for sample, label in zip(x, y):
        o = self.forward(sample)
        loss = self.cross_entropy(label, o)
        self.backward(loss)
        losses += [loss.item()]
      
      loss = np.mean(losses)
      if (epoch+1) % 10 == 0:
        print('epoch', epoch+1, ', loss', loss)    

In [0]:
mlp = MLP()

In [42]:
mlp.train(X, Y, epochs=100)

epoch 10 , loss 0.982461998462677
epoch 20 , loss 0.6689040878415108
epoch 30 , loss 0.9823818069696426
epoch 40 , loss 0.6696574705839157
epoch 50 , loss 0.9817364364862442
epoch 60 , loss 0.6715824556350708
epoch 70 , loss 0.9805195915699005
epoch 80 , loss 0.6746719020605088
epoch 90 , loss 0.9787228834629059
epoch 100 , loss 0.6789108946919441
